{ "cells": [ { "cell_type": "code", "execution_count": 22, "id": "15f4833b", "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "from dotenv import load_dotenv\n", "\n", "from evoagentx.agents.agent_manager import AgentManager\n", "from evoagentx.benchmark import HotPotQA\n", "from evoagentx.core.callbacks import suppress_logger_info\n", "from evoagentx.core.logging import logger\n", "from evoagentx.evaluators import Evaluator\n", "from evoagentx.models import OpenAILLM, OpenAILLMConfig\n", "from evoagentx.optimizers import TextGradOptimizer\n", "from evoagentx.prompts import StringTemplate\n", "from evoagentx.workflow import SequentialWorkFlowGraph\n", "from dotenv import load_dotenv\n", "\n", "from evoagentx.agents.agent_manager import AgentManager\n", "from evoagentx.benchmark import MBPP\n", "from evoagentx.core.callbacks import suppress_logger_info\n", "from evoagentx.core.logging import logger\n", "from evoagentx.evaluators import Evaluator\n", "from evoagentx.models import OpenAILLM, OpenAILLMConfig\n", "from evoagentx.optimizers import TextGradOptimizer\n", "from evoagentx.prompts import StringTemplate\n", "from evoagentx.workflow import SequentialWorkFlowGraph\n", "\n", "from evoagentx.models import OpenAILLMConfig, OpenAILLM\n", "from evoagentx.workflow import SEWWorkFlowGraph, STRUCTUREWorkFlowGraph\n", "from evoagentx.agents import AgentManager\n", "from evoagentx.benchmark import HumanEval,AFlowMBPP\n", "from evoagentx.evaluators import Evaluator \n", "from evoagentx.optimizers import SEWOptimizer, STRUCTUREOptimizer\n", "from evoagentx.optimizers.structure_optimizer import STRUCTUREWorkFlowScheme\n", "from evoagentx.core.callbacks import suppress_logger_info\n", "\n", "from evoagentx.models import OpenAILLMConfig, OpenAILLM,AzureOpenAIConfig,LiteLLMConfig,LiteLLM\n", "from evoagentx.workflow import SEWWorkFlowGraph \n", "from evoagentx.agents import AgentManager\n", "from evoagentx.benchmark import MBPPPLUS, AFlowMBPPPLUS\n", "from evoagentx.evaluators import Evaluator \n", "from evoagentx.optimizers import SEWOptimizer \n", "from evoagentx.core.callbacks import suppress_logger_info\n", "from evoagentx.benchmark import HumanEvalPLUS\n", "from evoagentx.benchmark import SciCode\n", "from evoagentx.benchmark import PertQA\n", "from copy import deepcopy\n", "\n", "import nest_asyncio\n", "nest_asyncio.apply()\n", "\n", "class PertQASplits(PertQA):\n", " def _load_data(self):\n", " # load the original test data \n", " super()._load_data(pertdata = 'adamson')\n", " # split the data into train, dev and test\n", " import numpy as np \n", " np.random.seed(42)\n", " permutation = np.random.permutation(len(self._dev_data))\n", " full_test_data = self._dev_data \n", " # randomly select 10 samples for train, 40 for dev, and 100 for test\n", " self._train_data = [full_test_data[idx] for idx in permutation[:50]]\n", " self._dev_data = [full_test_data[idx] for idx in permutation[:50]]\n", " self._fulldata = full_test_data\n", "\n", "\n", "def collate_func(example: dict) -> dict:\n", " problem = \"Question: {}\\n\\nAnswer:\".format(example[\"question_new\"])\n", " return {\"question\": problem}\n", "\n", "\n", "api_key = \"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\"\n", "# Function to encode the image\n", "import os\n", "os.environ[\"OPENAI_API_KEY\"] = api_key\n", "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", "\n", "\n", "llm_config = OpenAILLMConfig(model=\"gpt-4o-mini-2024-07-18\", openai_key=OPENAI_API_KEY, top_p=0.85, temperature=0.2, frequency_penalty=0.0, presence_penalty=0.0)\n", "llm = OpenAILLM(config=llm_config)\n", "# os.environ[\"AZURE_OPENAI_DEPLOYMENT_NAME\"] = \"gpt-4o-mini\"\n", "# os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"https://tianyuliu-hua-raredisea-resource.cognitiveservices.azure.com/\"\n", "# os.environ[\"AZURE_OPENAI_KEY\"] = \"2pa9h2ZIN1lQepFWwYADlXIKIansa9KPhxMoumeGbRQ08f2uDTXiJQQJ99BKACHYHv6XJ3w3AAAAACOGsQIt\"\n", "# os.environ[\"AZURE_OPENAI_API_VERSION\"] = \"2025-01-01-preview\"\n", "# llm_config = LiteLLMConfig(model=\"azure/\" + os.getenv(\"AZURE_OPENAI_DEPLOYMENT_NAME\"), # Azure model format\n", "# azure_endpoint=os.getenv(\"AZURE_OPENAI_ENDPOINT\"),\n", "# azure_key=os.getenv(\"AZURE_OPENAI_KEY\"),\n", "# api_version=os.getenv(\"AZURE_OPENAI_API_VERSION\", \"2024-12-01-preview\"), top_p=0.85, temperature=0.2, frequency_penalty=0.0, presence_penalty=0.0)\n", "\n", "# executor_llm = LiteLLM(config=llm_config)\n", "# optimizer_llm = LiteLLM(config=llm_config)\n", "# llm = executor_llm" ] }, { "cell_type": "code", "execution_count": 23, "id": "d954f709", "metadata": {}, "outputs": [], "source": [ "# hotpotqa_graph_data = {\n", "# \"goal\": \"Provide a direct answer to the question based on the context, without including explanations or reasoning.\",\n", "# \"tasks\": [\n", "# {\n", "# \"name\": \"answer_generate\",\n", "# \"description\": \"Generate a direct answer to the question based on the context.\",\n", "# \"inputs\": [\n", "# {\"name\": \"question\", \"type\": \"str\", \"required\": True, \"description\": \"The question to answer directly.\"}\n", "# ],\n", "# \"outputs\": [\n", "# {\"name\": \"answer\", \"type\": \"str\", \"required\": True, \"description\": \"The direct answer to the question.\"}\n", "# ],\n", "# \"prompt_template\": StringTemplate(instruction=\"Think step by step to answer the question. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field. You answer could be only Yes or NO.\\nFormat your output in xml format, such as xxx and xxx.\"),\n", "# \"parse_mode\": \"xml\"\n", "# }\n", "# ] \n", "# }\n", "\n", "#generated_workflow\n", "hotpotqa_graph_data = {\n", " \"goal\": \"Provide a concise answer to the question using relevant context. The answer must be straightforward and avoid unnecessary explanations.\",\n", " \"tasks\": [\n", " {\n", " \"name\": \"generate_answer\",\n", " \"description\": \"Extract and formulate an answer from the given context.\",\n", " \"inputs\": [\n", " {\"name\": \"question\", \"type\": \"str\", \"required\": True, \"description\": \"The question that needs to be answered.\"},\n", " ],\n", " \"outputs\": [\n", " {\"name\": \"answer\", \"type\": \"str\", \"required\": True, \"description\": \"The direct answer to the question.\"}\n", " ],\n", " \"prompt_template\": StringTemplate(instruction=\"Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.\"),\n", " \"parse_mode\": \"xml\"\n", " }\n", " ]\n", "}" ] }, { "cell_type": "code", "execution_count": 24, "id": "a3bcfc25", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:31:56.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.tools.storage_handler\u001b[0m:\u001b[36m_initialize_storage\u001b[0m:\u001b[36m133\u001b[0m - \u001b[1mLocal storage initialized with base path: .\u001b[0m\n" ] } ], "source": [ "from evoagentx.benchmark import HotPotQA\n", "from evoagentx.tools import ArxivToolkit\n", "import evoagentx.tools\n", "wiki_toolkit = evoagentx.tools.WikipediaSearchToolkit(max_summary_sentences=5)\n", "arxiv_toolkit = evoagentx.tools.ArxivToolkit()\n", "search_toolkit = evoagentx.tools.DDGSSearchToolkit( num_search_pages=5,\n", " max_content_words=300,\n", " backend=\"auto\", # Options: \"auto\", \"duckduckgo\", \"google\", \"bing\", \"brave\", \"yahoo\"\n", " region=\"us-en\" # Language and region settings\n", " )" ] }, { "cell_type": "code", "execution_count": 25, "id": "a962ae1e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:31:57.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.pertqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m52\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/adamson_train.json ...\u001b[0m\n", "\u001b[32m2026-01-01 17:31:57.084\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.pertqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m52\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/adamson_train.json ...\u001b[0m\n", "\u001b[32m2026-01-01 17:31:57.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.pertqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m52\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/adamson_test.json ...\u001b[0m\n" ] } ], "source": [ "# llm_config = OpenAILLMConfig(model=\"gpt-4.1-mini-2025-04-14\", openai_key=OPENAI_API_KEY, top_p=0.85, temperature=0.2, frequency_penalty=0.0, presence_penalty=0.0)\n", "# llm = OpenAILLM(config=llm_config)\n", "\n", "# obtain SEW workflow \n", "# sew_graph = SEWWorkFlowGraph.from_dict(hotpotqa_graph_data)\n", "# agent_manager = AgentManager()\n", "# agent_manager.add_agents_from_workflow(sew_graph, executor_llm.config)\n", "# obtain SEW workflow \n", "# sew_graph = QASTRUCTUREWorkFlowGraph.from_dict(hotpotqa_graph_data)\n", "benchmark = PertQA()\n", "sew_graph = SequentialWorkFlowGraph.from_dict(hotpotqa_graph_data)\n", "agent_manager = AgentManager(tools=[search_toolkit,wiki_toolkit,arxiv_toolkit])\n", "agent_manager.add_agents_from_workflow(sew_graph, llm_config=llm_config)\n", "evaluator = Evaluator(llm=llm, agent_manager=agent_manager, collate_func=collate_func, num_workers=20, verbose=True)" ] }, { "cell_type": "code", "execution_count": 26, "id": "656b3c46", "metadata": {}, "outputs": [], "source": [ "from evoagentx.optimizers import QASTRUCTUREOptimizer, TextGradOptimizer" ] }, { "cell_type": "code", "execution_count": 27, "id": "4318bce0", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# graph = QASTRUCTUREOptimizer.load_module(\"./debug/save_10_noreason.json\")\n", "# SequentialWorkFlowGraph.from_dict(graph['graph'])" ] }, { "cell_type": "code", "execution_count": 28, "id": "eaea09d1", "metadata": {}, "outputs": [], "source": [ "# graph" ] }, { "cell_type": "code", "execution_count": 29, "id": "227fc475", "metadata": { "scrolled": true }, "outputs": [], "source": [ "evaluator = Evaluator(llm=llm, agent_manager=agent_manager, collate_func=collate_func, num_workers=20, verbose=True)\n", "# obtain SEWOptimizer after having more roles\n", "optimizer = QASTRUCTUREOptimizer(\n", " graph=sew_graph, \n", " evaluator=evaluator, \n", " llm=llm, \n", " max_steps=30,\n", " eval_rounds=1, \n", " repr_scheme=\"python\", \n", " optimize_mode=\"all\", \n", " order=\"zero-order\",\n", " max_rounds=1\n", ")\n", "optimizer.calltime = 1\n", "optimizer.collate_func = collate_func\n", "\n", "benchmark.error_list = {}\n", "benchmark.timeout = 900\n", "benchmark.dataname = 'pubmedxqa'" ] }, { "cell_type": "code", "execution_count": 30, "id": "019bb9e5", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# optimizer.evaluator.dataname = 'hotpotqa'\n", "# with suppress_logger_info():\n", "# metrics = optimizer.evaluate(dataset=benchmark, eval_mode=\"test\")\n", "# print(\"Evaluation metrics: \", metrics)" ] }, { "cell_type": "code", "execution_count": 31, "id": "3984171e", "metadata": {}, "outputs": [], "source": [ "# metrics\n", "# # metrics" ] }, { "cell_type": "code", "execution_count": null, "id": "7767f030", "metadata": { "scrolled": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 32, "id": "c0648c81", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:32:07.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1011\u001b[0m - \u001b[1mOptimizing the SequentialWorkFlowGraph workflow with python representation.\u001b[0m\n", "\u001b[32m2026-01-01 17:32:07.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1015\u001b[0m - \u001b[1mRun initial evaluation on the original workflow ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 1%|▏ | 1/80 [00:03<04:18, 3.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▎ | 2/80 [00:05<03:12, 2.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 3/80 [00:05<02:11, 1.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 4/80 [00:08<02:35, 2.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 5/80 [00:10<02:26, 1.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 6/80 [00:11<02:13, 1.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 7/80 [00:13<02:10, 1.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 8/80 [00:15<02:08, 1.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 9/80 [00:16<01:43, 1.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▎ | 10/80 [00:16<01:25, 1.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 11/80 [00:20<02:09, 1.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 12/80 [00:20<01:45, 1.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 13/80 [00:21<01:32, 1.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 14/80 [00:23<01:26, 1.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 15/80 [00:23<01:13, 1.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 16/80 [00:24<01:09, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 17/80 [00:25<01:02, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▎ | 18/80 [00:26<00:58, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 19/80 [00:27<00:57, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 20/80 [00:28<01:01, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 21/80 [00:29<00:59, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 22/80 [00:30<01:04, 1.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 23/80 [00:31<00:59, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 24/80 [00:32<00:58, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 25/80 [00:33<00:54, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▎ | 26/80 [00:34<00:48, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 27/80 [00:35<00:43, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 28/80 [00:35<00:44, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 29/80 [00:36<00:42, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 30/80 [00:37<00:40, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 31/80 [00:38<00:43, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 32/80 [00:39<00:42, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 33/80 [00:40<00:44, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▎ | 34/80 [00:41<00:41, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 35/80 [00:42<00:42, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 36/80 [00:43<00:42, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 37/80 [00:45<00:52, 1.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 38/80 [00:45<00:44, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 39/80 [00:46<00:41, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 40/80 [00:47<00:41, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 41/80 [00:48<00:39, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▎ | 42/80 [00:49<00:35, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 43/80 [00:50<00:33, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 44/80 [00:51<00:31, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 45/80 [00:51<00:28, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▊ | 46/80 [00:52<00:27, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 47/80 [00:53<00:28, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 48/80 [00:54<00:30, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 49/80 [00:55<00:29, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▎ | 50/80 [00:56<00:27, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 51/80 [00:57<00:26, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 52/80 [00:58<00:25, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 53/80 [00:59<00:26, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 54/80 [01:00<00:24, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 55/80 [01:02<00:30, 1.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 56/80 [01:03<00:27, 1.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 57/80 [01:04<00:24, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▎ | 58/80 [01:05<00:22, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 59/80 [01:05<00:19, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 60/80 [01:06<00:20, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 61/80 [01:07<00:18, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 62/80 [01:08<00:16, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 63/80 [01:09<00:16, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 64/80 [01:10<00:14, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 65/80 [01:11<00:14, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▎ | 66/80 [01:12<00:14, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 67/80 [01:13<00:12, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 68/80 [01:14<00:10, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 69/80 [01:15<00:09, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 70/80 [01:16<00:08, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 71/80 [01:18<00:11, 1.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 72/80 [01:18<00:08, 1.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 73/80 [01:19<00:06, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▎| 74/80 [01:20<00:06, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 75/80 [01:21<00:04, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 76/80 [01:22<00:03, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 77/80 [01:23<00:03, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 78/80 [01:24<00:01, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 79/80 [01:25<00:00, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 80/80 [01:26<00:00, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:33:34.371\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1019\u001b[0m - \u001b[1mInitial metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.9125}\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:33:37.530\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.760 | Total tokens: 13251421 | Current cost: $0.001 | Current tokens: 7985\u001b[0m\n", "- The workflow lacks validation steps to ensure that the generated answers are consistent with the predictions and solutions, leading to potential discrepancies.\n", "- There are multiple instances where the predictions and solutions are inconsistent, particularly with the questions regarding perturbations of MRGBP, SOCS1, DDIT3, and HSD17B12, indicating a failure in the computational logic or data interpretation.\n", "- The workflow does not account for the context of each perturbation adequately, which may lead to incorrect assumptions about the significance of expression changes.\n", "- The structure of the workflow is overly simplistic, relying solely on a single step to generate answers without considering the need for intermediate evaluations or checks.\n", "- The repeated use of the same question format without variation may lead to ambiguity in understanding the specific context of each perturbation, potentially affecting the quality of the answers.\n", "\u001b[32m2026-01-01 17:33:38.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.760 | Total tokens: 13252075 | Current cost: $0.000 | Current tokens: 654\u001b[0m\n", "```python\n", "steps = [\n", " {'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", " {'name': 'validate_answer', 'args': ['answer'], 'outputs': ['validated_answer']},\n", " {'name': 'contextualize_answer', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of CD52 is measured. Determine whether CD52 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of RHCE is measured. Determine whether RHCE shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPZ1, does the expression profile of SH3BGRL3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SRP68 and then measure expression of PPCS. Does this perturbation cause a significant change in PPCS expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TMEM167A is perturbed and the expression of CD52 is measured. Does this perturbation cause a significant change in CD52 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSD17B12 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DDIT3 and examine the expression of RGS16. Does perturbing DDIT3 lead to a significant change in RGS16 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TIMM23 and then measure expression of SH3BGRL3. Does this perturbation cause a significant change in SH3BGRL3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, IER3IP1 is perturbed and CLCA1 expression is measured. Determine whether CLCA1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of TELO2, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PDIA6 is perturbed and YTHDF2 expression is observed. Does this perturbation lead to a significant difference in YTHDF2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, P4HB is perturbed and RHCE expression is measured. Determine whether RHCE exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IDH3A is perturbed and SNHG12 expression is observed. Does this perturbation lead to a significant difference in SNHG12 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DNAJC19 is perturbed and PHGDH expression is quantified. Does this perturbation result in a significant change in PHGDH expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, STT3A is perturbed and the expression of FCER1G is measured. Determine whether FCER1G shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PPWD1 is associated with a significant change in CD52 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SPCS2 is perturbed and the expression of SH3BGRL3 is measured. Determine whether SH3BGRL3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ARHGAP22, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MARS, does the expression profile of FAM129A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLMO2 is perturbed and the expression of FCGR2A is measured. Does this perturbation cause a significant change in FCGR2A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SLC35B1 and examine the expression of RHCE. Does perturbing SLC35B1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZNF326 is associated with a significant change in RGS16 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SOCS1 is perturbed and RPS27 expression is measured. Determine whether RPS27 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SYVN1 is perturbed and YTHDF2 expression is quantified. Does this perturbation result in a significant change in YTHDF2 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TMED2 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEC63 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLC35B1 is perturbed and the expression of FCER1G is measured. Does this perturbation cause a significant change in FCER1G expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GBF1 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb IER3IP1 and monitor FCER1G expression. Decide whether this perturbation leads to a significant alteration in FCER1G expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb FARSB and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UFM1 is perturbed and the expression of CLCA1 is measured. Does this perturbation cause a significant change in CLCA1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRGBP and then measure expression of RPS27. Does this perturbation cause a significant change in RPS27 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, BHLHE40 is perturbed and SESN2 expression is measured. Determine whether SESN2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SCYL1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SEC63 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SRP72 and examine the expression of RPS27. Does perturbing SRP72 lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CHERP is perturbed and NPL expression is quantified. Does this perturbation result in a significant change in NPL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb HYOU1 and examine the expression of SNHG12. Does perturbing HYOU1 lead to a significant change in SNHG12 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NEDD8 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, HSD17B12 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SRPRB is perturbed and the expression of PPCS is measured. Determine whether PPCS shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SAMM50 and examine the expression of FCGR2A. Does perturbing SAMM50 lead to a significant change in FCGR2A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': None, 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:33:41.840\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.761 | Total tokens: 13260089 | Current cost: $0.001 | Current tokens: 8014\u001b[0m\n", "\u001b[32m2026-01-01 17:33:43.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.761 | Total tokens: 13260207 | Current cost: $0.000 | Current tokens: 118\u001b[0m\n", "\u001b[32m2026-01-01 17:33:45.145\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.761 | Total tokens: 13260724 | Current cost: $0.000 | Current tokens: 517\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': 'Your are a task solver.', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:33:47.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.762 | Total tokens: 13268765 | Current cost: $0.001 | Current tokens: 8041\u001b[0m\n", "\u001b[32m2026-01-01 17:33:48.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.762 | Total tokens: 13268869 | Current cost: $0.000 | Current tokens: 104\u001b[0m\n", "\u001b[32m2026-01-01 17:33:50.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.763 | Total tokens: 13269431 | Current cost: $0.000 | Current tokens: 562\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': 'Your are a task solver.', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:33:52.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.764 | Total tokens: 13277480 | Current cost: $0.001 | Current tokens: 8049\u001b[0m\n", "\u001b[32m2026-01-01 17:33:53.392\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.764 | Total tokens: 13277576 | Current cost: $0.000 | Current tokens: 96\u001b[0m\n", "\u001b[32m2026-01-01 17:33:55.044\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.764 | Total tokens: 13278136 | Current cost: $0.000 | Current tokens: 560\u001b[0m\n", "\u001b[32m2026-01-01 17:33:55.044\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 1 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 1%|▏ | 1/80 [00:00<01:06, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▎ | 2/80 [00:01<01:00, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 3/80 [00:02<01:09, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 4/80 [00:03<01:19, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 5/80 [00:04<01:13, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 6/80 [00:05<01:16, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 7/80 [00:06<01:07, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 8/80 [00:10<02:18, 1.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 9/80 [00:11<01:51, 1.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▎ | 10/80 [00:12<01:32, 1.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 11/80 [00:12<01:18, 1.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 12/80 [00:13<01:10, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 13/80 [00:15<01:15, 1.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 14/80 [00:16<01:11, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 15/80 [00:16<01:07, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 16/80 [00:18<01:08, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 17/80 [00:18<01:00, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▎ | 18/80 [00:19<00:55, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 19/80 [00:20<00:54, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 20/80 [00:21<00:51, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 21/80 [00:22<00:56, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 22/80 [00:23<00:56, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 23/80 [00:24<00:50, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 24/80 [00:25<00:50, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 25/80 [00:26<00:54, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▎ | 26/80 [00:27<00:51, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 27/80 [00:28<00:50, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 28/80 [00:28<00:47, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 29/80 [00:29<00:44, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 30/80 [00:30<00:41, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 31/80 [00:31<00:40, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 32/80 [00:32<00:38, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 33/80 [00:33<00:42, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▎ | 34/80 [00:34<00:41, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 35/80 [00:35<00:49, 1.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 36/80 [00:36<00:46, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 37/80 [00:37<00:41, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 38/80 [00:38<00:38, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 39/80 [00:39<00:40, 1.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 40/80 [00:40<00:38, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 41/80 [00:40<00:35, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▎ | 42/80 [00:42<00:37, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 43/80 [00:42<00:33, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 44/80 [00:43<00:30, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 45/80 [00:44<00:29, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▊ | 46/80 [00:45<00:31, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 47/80 [00:46<00:31, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 48/80 [00:47<00:31, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 49/80 [00:48<00:32, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▎ | 50/80 [00:49<00:32, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 51/80 [00:50<00:29, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 52/80 [00:51<00:28, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 53/80 [00:53<00:29, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 54/80 [00:53<00:26, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 55/80 [00:54<00:24, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 56/80 [00:55<00:23, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 57/80 [00:56<00:21, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▎ | 58/80 [00:57<00:19, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 59/80 [00:58<00:18, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 60/80 [00:59<00:17, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 61/80 [01:00<00:16, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 62/80 [01:00<00:15, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 63/80 [01:01<00:14, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 64/80 [01:02<00:13, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 65/80 [01:04<00:15, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▎ | 66/80 [01:04<00:13, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 67/80 [01:05<00:11, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 68/80 [01:06<00:10, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 69/80 [01:07<00:10, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 70/80 [01:08<00:09, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 71/80 [01:09<00:08, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 72/80 [01:10<00:07, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 73/80 [01:11<00:06, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▎| 74/80 [01:12<00:06, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 75/80 [01:13<00:04, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 76/80 [01:14<00:03, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 77/80 [01:15<00:03, 1.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 78/80 [01:16<00:02, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 79/80 [01:17<00:01, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 80/80 [01:18<00:00, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:35:13.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 1 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.9375}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:35:16.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.773 | Total tokens: 13335252 | Current cost: $0.001 | Current tokens: 8018\u001b[0m\n", "- The workflow lacks a validation step after generating the answer, which could lead to incorrect outputs being used in subsequent steps.\n", "- There is no explicit handling of cases where the answer may not fit the expected format ('Final Answer: Yes' or 'Final Answer: No'), which could result in ambiguous or incorrect responses.\n", "- The control flow does not account for potential errors in the validation step, leading to unhandled cases if the answer is not validated correctly.\n", "- The workflow assumes that all generated answers will be valid without any checks for logical consistency or relevance to the question, which may lead to misleading conclusions.\n", "- The execution history shows multiple instances where the predictions and solutions diverge, indicating a lack of robustness in the answer generation process.\n", "\u001b[32m2026-01-01 17:35:18.443\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.773 | Total tokens: 13335940 | Current cost: $0.000 | Current tokens: 688\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of CD52 is measured. Determine whether CD52 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of RHCE is measured. Determine whether RHCE shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPZ1, does the expression profile of SH3BGRL3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SRP68 and then measure expression of PPCS. Does this perturbation cause a significant change in PPCS expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TMEM167A is perturbed and the expression of CD52 is measured. Does this perturbation cause a significant change in CD52 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSD17B12 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DDIT3 and examine the expression of RGS16. Does perturbing DDIT3 lead to a significant change in RGS16 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TIMM23 and then measure expression of SH3BGRL3. Does this perturbation cause a significant change in SH3BGRL3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, IER3IP1 is perturbed and CLCA1 expression is measured. Determine whether CLCA1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of TELO2, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PDIA6 is perturbed and YTHDF2 expression is observed. Does this perturbation lead to a significant difference in YTHDF2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, P4HB is perturbed and RHCE expression is measured. Determine whether RHCE exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IDH3A is perturbed and SNHG12 expression is observed. Does this perturbation lead to a significant difference in SNHG12 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DNAJC19 is perturbed and PHGDH expression is quantified. Does this perturbation result in a significant change in PHGDH expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, STT3A is perturbed and the expression of FCER1G is measured. Determine whether FCER1G shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PPWD1 is associated with a significant change in CD52 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SPCS2 is perturbed and the expression of SH3BGRL3 is measured. Determine whether SH3BGRL3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ARHGAP22, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MARS, does the expression profile of FAM129A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLMO2 is perturbed and the expression of FCGR2A is measured. Does this perturbation cause a significant change in FCGR2A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SLC35B1 and examine the expression of RHCE. Does perturbing SLC35B1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZNF326 is associated with a significant change in RGS16 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SOCS1 is perturbed and RPS27 expression is measured. Determine whether RPS27 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SYVN1 is perturbed and YTHDF2 expression is quantified. Does this perturbation result in a significant change in YTHDF2 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TMED2 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEC63 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLC35B1 is perturbed and the expression of FCER1G is measured. Does this perturbation cause a significant change in FCER1G expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GBF1 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb IER3IP1 and monitor FCER1G expression. Decide whether this perturbation leads to a significant alteration in FCER1G expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb FARSB and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UFM1 is perturbed and the expression of CLCA1 is measured. Does this perturbation cause a significant change in CLCA1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRGBP and then measure expression of RPS27. Does this perturbation cause a significant change in RPS27 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, BHLHE40 is perturbed and SESN2 expression is measured. Determine whether SESN2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SCYL1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SEC63 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SRP72 and examine the expression of RPS27. Does perturbing SRP72 lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CHERP is perturbed and NPL expression is quantified. Does this perturbation result in a significant change in NPL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb HYOU1 and examine the expression of SNHG12. Does perturbing HYOU1 lead to a significant change in SNHG12 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NEDD8 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, HSD17B12 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SRPRB is perturbed and the expression of PPCS is measured. Determine whether PPCS shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SAMM50 and examine the expression of FCGR2A. Does perturbing SAMM50 lead to a significant change in FCGR2A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from {question} to determine the best answer. Ensure that the answer is validated for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:35:21.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.774 | Total tokens: 13343939 | Current cost: $0.001 | Current tokens: 7999\u001b[0m\n", "\u001b[32m2026-01-01 17:35:23.058\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.775 | Total tokens: 13344035 | Current cost: $0.000 | Current tokens: 96\u001b[0m\n", "\u001b[32m2026-01-01 17:35:24.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.775 | Total tokens: 13344593 | Current cost: $0.000 | Current tokens: 558\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy. If the answer is uncertain or ambiguous, clearly state that. After validation, generate a straightforward answer that directly addresses {question}. Format your output in XML, using to explain your reasoning and for the final response.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:35:27.532\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.776 | Total tokens: 13352662 | Current cost: $0.001 | Current tokens: 8069\u001b[0m\n", "\u001b[32m2026-01-01 17:35:29.145\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.776 | Total tokens: 13352770 | Current cost: $0.000 | Current tokens: 108\u001b[0m\n", "\u001b[32m2026-01-01 17:35:31.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.776 | Total tokens: 13353488 | Current cost: $0.000 | Current tokens: 718\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first validate the answer against the context of the question. Ensure that the answer is correct before integrating any additional context. In your thought process, consider how the context relates to the answer and clarify any assumptions made. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:35:34.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.777 | Total tokens: 13361565 | Current cost: $0.001 | Current tokens: 8077\u001b[0m\n", "\u001b[32m2026-01-01 17:35:35.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.777 | Total tokens: 13361669 | Current cost: $0.000 | Current tokens: 104\u001b[0m\n", "\u001b[32m2026-01-01 17:35:38.217\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.778 | Total tokens: 13362403 | Current cost: $0.000 | Current tokens: 734\u001b[0m\n", "\u001b[32m2026-01-01 17:35:38.217\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 2 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:42, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:43, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:37, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:37, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:37, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:36, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:05<00:37, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:06<00:34, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:07<00:32, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:08<00:30, 1.31it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:08<00:28, 1.37it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:09<00:28, 1.35it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:10<00:33, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:11<00:32, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:12<00:35, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:13<00:33, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:15<00:34, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:16<00:32, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:17<00:31, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:17<00:28, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:18<00:28, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:19<00:26, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:20<00:24, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:21<00:24, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:22<00:22, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:23<00:24, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:24<00:23, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:25<00:20, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:26<00:19, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:27<00:18, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:27<00:16, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:28<00:15, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:29<00:14, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:30<00:12, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:31<00:11, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:32<00:13, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:33<00:11, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:34<00:13, 1.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:35<00:11, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:36<00:09, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:37<00:08, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:38<00:07, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:39<00:06, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:40<00:05, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:41<00:04, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:42<00:03, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:42<00:02, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:43<00:01, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:44<00:00, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:45<00:00, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:36:23.830\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 2 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.96}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:36:27.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.784 | Total tokens: 13401100 | Current cost: $0.001 | Current tokens: 8021\u001b[0m\n", "- The workflow lacks a validation step after generating the answer, which could lead to incorrect conclusions being drawn without verification.\n", "- There is no explicit handling of cases where the answer might be ambiguous or where data may not support a clear 'Yes' or 'No' response.\n", "- The workflow assumes that all questions can be answered with a binary response without considering the complexity of biological data, which may not always fit this model.\n", "- The repeated occurrence of incorrect predictions and solutions indicates a potential flaw in the underlying model or data processing, suggesting that the model may not be adequately trained for all scenarios presented.\n", "- The control flow does not account for the possibility of conflicting results from different questions, which could lead to inconsistencies in the overall assessment of the data.\n", "\u001b[32m2026-01-01 17:36:28.814\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.784 | Total tokens: 13401791 | Current cost: $0.000 | Current tokens: 691\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, BHLHE40 is perturbed and SESN2 expression is measured. Determine whether SESN2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SOCS1 is perturbed and RPS27 expression is measured. Determine whether RPS27 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TIMM23 and then measure expression of SH3BGRL3. Does this perturbation cause a significant change in SH3BGRL3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SRP68 and then measure expression of PPCS. Does this perturbation cause a significant change in PPCS expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SRPRB is perturbed and the expression of PPCS is measured. Determine whether PPCS shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MARS, does the expression profile of FAM129A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UFM1 is perturbed and the expression of CLCA1 is measured. Does this perturbation cause a significant change in CLCA1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of TELO2, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPZ1, does the expression profile of SH3BGRL3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GBF1 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, HSD17B12 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TMEM167A is perturbed and the expression of CD52 is measured. Does this perturbation cause a significant change in CD52 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SCYL1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ARHGAP22, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, STT3A is perturbed and the expression of FCER1G is measured. Determine whether FCER1G shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SAMM50 and examine the expression of FCGR2A. Does perturbing SAMM50 lead to a significant change in FCGR2A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, P4HB is perturbed and RHCE expression is measured. Determine whether RHCE exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NEDD8 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PPWD1 is associated with a significant change in CD52 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SPCS2 is perturbed and the expression of SH3BGRL3 is measured. Determine whether SH3BGRL3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SEC63 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IDH3A is perturbed and SNHG12 expression is observed. Does this perturbation lead to a significant difference in SNHG12 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of CD52 is measured. Determine whether CD52 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEC63 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CHERP is perturbed and NPL expression is quantified. Does this perturbation result in a significant change in NPL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb FARSB and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZNF326 is associated with a significant change in RGS16 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLMO2 is perturbed and the expression of FCGR2A is measured. Does this perturbation cause a significant change in FCGR2A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, indicate this clearly. Ensure that the answer is validated for accuracy before proceeding to the next step. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:36:31.136\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.785 | Total tokens: 13406950 | Current cost: $0.001 | Current tokens: 5159\u001b[0m\n", "\u001b[32m2026-01-01 17:36:32.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.785 | Total tokens: 13407076 | Current cost: $0.000 | Current tokens: 126\u001b[0m\n", "\u001b[32m2026-01-01 17:36:33.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.785 | Total tokens: 13407714 | Current cost: $0.000 | Current tokens: 638\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that. After validation, generate a straightforward answer that directly addresses {question}, considering any nuances in the data. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer reflects significant expression changes accurately and is free from oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:36:36.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.786 | Total tokens: 13412923 | Current cost: $0.001 | Current tokens: 5209\u001b[0m\n", "\u001b[32m2026-01-01 17:36:37.367\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.786 | Total tokens: 13413027 | Current cost: $0.000 | Current tokens: 104\u001b[0m\n", "\u001b[32m2026-01-01 17:36:38.973\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.786 | Total tokens: 13413817 | Current cost: $0.000 | Current tokens: 790\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. In your thought process, consider how the context relates to the answer and clarify any assumptions made. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:36:41.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.787 | Total tokens: 13419067 | Current cost: $0.001 | Current tokens: 5250\u001b[0m\n", "\u001b[32m2026-01-01 17:36:43.377\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.787 | Total tokens: 13419172 | Current cost: $0.000 | Current tokens: 105\u001b[0m\n", "\u001b[32m2026-01-01 17:36:45.185\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.787 | Total tokens: 13420052 | Current cost: $0.000 | Current tokens: 880\u001b[0m\n", "\u001b[32m2026-01-01 17:36:45.185\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 3 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:37, 1.30it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:40, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:49, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:43, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:47, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:41, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:44, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:41, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:08<00:40, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:10<00:42, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:10<00:39, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:11<00:37, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:12<00:34, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:13<00:31, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:14<00:32, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:15<00:29, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:16<00:29, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:17<00:28, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:18<00:30, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:19<00:32, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:20<00:29, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:21<00:28, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:22<00:30, 1.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:23<00:26, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:24<00:25, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:25<00:23, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:26<00:21, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:27<00:22, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:28<00:19, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:29<00:20, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:30<00:19, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:31<00:17, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:32<00:15, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:32<00:14, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:33<00:13, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:34<00:13, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:35<00:11, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:36<00:10, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:37<00:09, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:38<00:09, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:39<00:08, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:40<00:07, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:41<00:06, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:42<00:05, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:42<00:04, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:44<00:03, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:45<00:03, 1.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:46<00:01, 1.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:47<00:00, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:47<00:00, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:37:33.015\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 3 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.98}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:37:36.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.793 | Total tokens: 13458750 | Current cost: $0.001 | Current tokens: 8018\u001b[0m\n", "- The workflow lacks a validation step after generating the answer, which could lead to incorrect outputs being used in subsequent steps.\n", "- There is no error handling or fallback mechanism in the workflow to address cases where the validation fails or the answer is ambiguous.\n", "- The workflow assumes that all questions can be answered with a binary response ('Yes' or 'No') without considering the complexity or nuances of the data being analyzed.\n", "- The execution history shows multiple instances where the solution is marked as incorrect, indicating potential flaws in the answer generation or validation processes that are not accounted for in the workflow.\n", "- The prompts and intermediate steps do not provide sufficient context or criteria for determining what constitutes a \"significant change,\" leading to potential ambiguity in the answers.\n", "\u001b[32m2026-01-01 17:37:37.536\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.793 | Total tokens: 13459438 | Current cost: $0.000 | Current tokens: 688\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GBF1 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DDIT3 and examine the expression of RGS16. Does perturbing DDIT3 lead to a significant change in RGS16 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRGBP and then measure expression of RPS27. Does this perturbation cause a significant change in RPS27 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SRP68 and then measure expression of PPCS. Does this perturbation cause a significant change in PPCS expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SAMM50 and examine the expression of FCGR2A. Does perturbing SAMM50 lead to a significant change in FCGR2A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ARHGAP22, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DNAJC19 is perturbed and PHGDH expression is quantified. Does this perturbation result in a significant change in PHGDH expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEC63 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UFM1 is perturbed and the expression of CLCA1 is measured. Does this perturbation cause a significant change in CLCA1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of CD52 is measured. Determine whether CD52 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of RHCE is measured. Determine whether RHCE shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZNF326 is associated with a significant change in RGS16 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NEDD8 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of TELO2, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PDIA6 is perturbed and YTHDF2 expression is observed. Does this perturbation lead to a significant difference in YTHDF2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb FARSB and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SEC63 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, IER3IP1 is perturbed and CLCA1 expression is measured. Determine whether CLCA1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb IER3IP1 and monitor FCER1G expression. Decide whether this perturbation leads to a significant alteration in FCER1G expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TMED2 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb HYOU1 and examine the expression of SNHG12. Does perturbing HYOU1 lead to a significant change in SNHG12 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SCYL1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:37:40.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.794 | Total tokens: 13464613 | Current cost: $0.001 | Current tokens: 5175\u001b[0m\n", "\u001b[32m2026-01-01 17:37:42.034\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.794 | Total tokens: 13464718 | Current cost: $0.000 | Current tokens: 105\u001b[0m\n", "\u001b[32m2026-01-01 17:37:43.084\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.794 | Total tokens: 13465404 | Current cost: $0.000 | Current tokens: 686\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:37:46.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.795 | Total tokens: 13470622 | Current cost: $0.001 | Current tokens: 5218\u001b[0m\n", "\u001b[32m2026-01-01 17:37:48.165\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.795 | Total tokens: 13470726 | Current cost: $0.000 | Current tokens: 104\u001b[0m\n", "\u001b[32m2026-01-01 17:37:49.935\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.795 | Total tokens: 13471645 | Current cost: $0.000 | Current tokens: 919\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:37:52.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.796 | Total tokens: 13476869 | Current cost: $0.001 | Current tokens: 5224\u001b[0m\n", "\u001b[32m2026-01-01 17:37:53.590\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.796 | Total tokens: 13476965 | Current cost: $0.000 | Current tokens: 96\u001b[0m\n", "\u001b[32m2026-01-01 17:37:55.195\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.797 | Total tokens: 13477921 | Current cost: $0.000 | Current tokens: 956\u001b[0m\n", "\u001b[32m2026-01-01 17:37:55.195\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 4 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:38, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:42, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:39, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:37, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:35, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:04<00:34, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:05<00:32, 1.32it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:49, 1.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:08<00:42, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:10<00:51, 1.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:11<00:49, 1.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:12<00:41, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:12<00:36, 1.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:13<00:34, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:14<00:33, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:15<00:34, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:16<00:32, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:17<00:32, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:18<00:30, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:19<00:28, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:20<00:25, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:21<00:25, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:22<00:23, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:23<00:22, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:23<00:21, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:24<00:20, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:25<00:19, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:26<00:19, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:27<00:17, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:28<00:17, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:29<00:17, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:29<00:15, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:30<00:13, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:31<00:14, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:32<00:13, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:33<00:11, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:34<00:11, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:35<00:10, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:35<00:09, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:36<00:08, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:37<00:08, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:39<00:08, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:40<00:07, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:40<00:05, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:41<00:04, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:42<00:03, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:43<00:02, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:43<00:01, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:44<00:00, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:45<00:00, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:38:41.120\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 4 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.92}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:38:44.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.803 | Total tokens: 13516639 | Current cost: $0.001 | Current tokens: 8012\u001b[0m\n", "- The workflow lacks a validation step after generating the answer, which could lead to incorrect outputs being used in subsequent steps.\n", "- There is no explicit handling of cases where the answer might be ambiguous or not strictly 'Yes' or 'No', potentially leading to misinterpretation of results.\n", "- The workflow assumes that all questions can be answered with a binary response without considering the complexity of biological data, which may not always fit this model.\n", "- The execution history shows multiple instances where the predicted answers were incorrect, indicating a potential flaw in the answer generation process or the underlying data interpretation.\n", "- The workflow does not include any mechanism for addressing or correcting errors in predictions, leading to repeated inaccuracies in the final answers.\n", "\u001b[32m2026-01-01 17:38:45.873\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.803 | Total tokens: 13517321 | Current cost: $0.000 | Current tokens: 682\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, HSD17B12 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GBF1 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TMEM167A is perturbed and the expression of CD52 is measured. Does this perturbation cause a significant change in CD52 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of TELO2, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SOCS1 is perturbed and RPS27 expression is measured. Determine whether RPS27 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SLC35B1 and examine the expression of RHCE. Does perturbing SLC35B1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEC63 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLC35B1 is perturbed and the expression of FCER1G is measured. Does this perturbation cause a significant change in FCER1G expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CHERP is perturbed and NPL expression is quantified. Does this perturbation result in a significant change in NPL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SRP72 and examine the expression of RPS27. Does perturbing SRP72 lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb FARSB and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, STT3A is perturbed and the expression of FCER1G is measured. Determine whether FCER1G shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SAMM50 and examine the expression of FCGR2A. Does perturbing SAMM50 lead to a significant change in FCGR2A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TMED2 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb HYOU1 and examine the expression of SNHG12. Does perturbing HYOU1 lead to a significant change in SNHG12 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of CD52 is measured. Determine whether CD52 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IDH3A is perturbed and SNHG12 expression is observed. Does this perturbation lead to a significant difference in SNHG12 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PDIA6 is perturbed and YTHDF2 expression is observed. Does this perturbation lead to a significant difference in YTHDF2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SEC63 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SRP68 and then measure expression of PPCS. Does this perturbation cause a significant change in PPCS expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DNAJC19 is perturbed and PHGDH expression is quantified. Does this perturbation result in a significant change in PHGDH expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZNF326 is associated with a significant change in RGS16 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, P4HB is perturbed and RHCE expression is measured. Determine whether RHCE exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:38:48.919\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.804 | Total tokens: 13522532 | Current cost: $0.001 | Current tokens: 5211\u001b[0m\n", "\u001b[32m2026-01-01 17:38:50.383\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.804 | Total tokens: 13522651 | Current cost: $0.000 | Current tokens: 119\u001b[0m\n", "\u001b[32m2026-01-01 17:38:51.275\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.804 | Total tokens: 13523402 | Current cost: $0.000 | Current tokens: 751\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:38:54.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.805 | Total tokens: 13528675 | Current cost: $0.001 | Current tokens: 5273\u001b[0m\n", "\u001b[32m2026-01-01 17:38:55.888\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.805 | Total tokens: 13528783 | Current cost: $0.000 | Current tokens: 108\u001b[0m\n", "\u001b[32m2026-01-01 17:38:57.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.805 | Total tokens: 13529836 | Current cost: $0.000 | Current tokens: 1053\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:39:00.750\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.806 | Total tokens: 13535101 | Current cost: $0.001 | Current tokens: 5265\u001b[0m\n", "\u001b[32m2026-01-01 17:39:02.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.806 | Total tokens: 13535196 | Current cost: $0.000 | Current tokens: 95\u001b[0m\n", "\u001b[32m2026-01-01 17:39:04.381\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.806 | Total tokens: 13536269 | Current cost: $0.000 | Current tokens: 1073\u001b[0m\n", "\u001b[32m2026-01-01 17:39:04.381\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 5 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:41, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:34, 1.37it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:32, 1.43it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:35, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:40, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:39, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:41, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:06<00:36, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:07<00:36, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:08<00:34, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:09<00:31, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:10<00:31, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:10<00:28, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:11<00:28, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:12<00:26, 1.30it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:13<00:25, 1.34it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:13<00:23, 1.41it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:14<00:26, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:16<00:30, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:16<00:27, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:17<00:28, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:18<00:27, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:19<00:25, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:20<00:22, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:21<00:21, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:22<00:20, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:23<00:20, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:23<00:19, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:24<00:18, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:25<00:17, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:26<00:16, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:27<00:16, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:28<00:15, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:29<00:15, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:30<00:14, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:31<00:13, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:32<00:11, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:33<00:11, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:34<00:11, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:35<00:09, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:36<00:08, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:37<00:08, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:38<00:07, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:39<00:05, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:40<00:04, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:41<00:03, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:41<00:02, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:42<00:01, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:43<00:00, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:44<00:00, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:39:49.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 5 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.98}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:39:52.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.812 | Total tokens: 13574984 | Current cost: $0.001 | Current tokens: 8007\u001b[0m\n", "- The workflow lacks a validation step to ensure that the generated answer aligns with the specific format required ('Final Answer: Yes' or 'Final Answer: No'), leading to potential inconsistencies in output.\n", "- There are multiple instances of incorrect computation results, as indicated by the execution history, suggesting that the validation step does not adequately catch errors in the answer generation process.\n", "- The workflow does not account for the possibility of ambiguous or misleading questions, which could lead to incorrect answers being generated without proper context or clarification.\n", "- The ordering of steps does not allow for iterative refinement; if an answer is incorrect, there is no mechanism to revisit the question or the generated answer before finalization.\n", "\u001b[32m2026-01-01 17:39:54.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.812 | Total tokens: 13575661 | Current cost: $0.000 | Current tokens: 677\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of TELO2, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb HYOU1 and examine the expression of SNHG12. Does perturbing HYOU1 lead to a significant change in SNHG12 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLC35B1 is perturbed and the expression of FCER1G is measured. Does this perturbation cause a significant change in FCER1G expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEC63 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TMED2 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of CD52 is measured. Determine whether CD52 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SYVN1 is perturbed and YTHDF2 expression is quantified. Does this perturbation result in a significant change in YTHDF2 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DNAJC19 is perturbed and PHGDH expression is quantified. Does this perturbation result in a significant change in PHGDH expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SRPRB is perturbed and the expression of PPCS is measured. Determine whether PPCS shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CHERP is perturbed and NPL expression is quantified. Does this perturbation result in a significant change in NPL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SLC35B1 and examine the expression of RHCE. Does perturbing SLC35B1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb IER3IP1 and monitor FCER1G expression. Decide whether this perturbation leads to a significant alteration in FCER1G expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, HSD17B12 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GBF1 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SCYL1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of RHCE is measured. Determine whether RHCE shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DDIT3 and examine the expression of RGS16. Does perturbing DDIT3 lead to a significant change in RGS16 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, P4HB is perturbed and RHCE expression is measured. Determine whether RHCE exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TIMM23 and then measure expression of SH3BGRL3. Does this perturbation cause a significant change in SH3BGRL3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IDH3A is perturbed and SNHG12 expression is observed. Does this perturbation lead to a significant difference in SNHG12 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PDIA6 is perturbed and YTHDF2 expression is observed. Does this perturbation lead to a significant difference in YTHDF2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SRP68 and then measure expression of PPCS. Does this perturbation cause a significant change in PPCS expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:39:57.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.813 | Total tokens: 13580861 | Current cost: $0.001 | Current tokens: 5200\u001b[0m\n", "\u001b[32m2026-01-01 17:39:58.882\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.813 | Total tokens: 13580952 | Current cost: $0.000 | Current tokens: 91\u001b[0m\n", "\u001b[32m2026-01-01 17:40:00.203\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.813 | Total tokens: 13581740 | Current cost: $0.000 | Current tokens: 788\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:40:02.474\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.814 | Total tokens: 13586971 | Current cost: $0.001 | Current tokens: 5231\u001b[0m\n", "\u001b[32m2026-01-01 17:40:03.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.814 | Total tokens: 13587075 | Current cost: $0.000 | Current tokens: 104\u001b[0m\n", "\u001b[32m2026-01-01 17:40:05.494\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.815 | Total tokens: 13588208 | Current cost: $0.000 | Current tokens: 1133\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:40:08.211\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.815 | Total tokens: 13593456 | Current cost: $0.001 | Current tokens: 5248\u001b[0m\n", "\u001b[32m2026-01-01 17:40:09.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.815 | Total tokens: 13593560 | Current cost: $0.000 | Current tokens: 104\u001b[0m\n", "\u001b[32m2026-01-01 17:40:11.502\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.816 | Total tokens: 13594748 | Current cost: $0.000 | Current tokens: 1188\u001b[0m\n", "\u001b[32m2026-01-01 17:40:11.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 6 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:32, 1.49it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:32, 1.46it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:38, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:49, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:44, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:42, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:40, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:38, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:08<00:36, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:09<00:37, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:10<00:35, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:10<00:32, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:11<00:35, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:13<00:36, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:13<00:32, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:14<00:30, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:15<00:28, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:16<00:27, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:17<00:28, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:18<00:27, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:19<00:25, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:20<00:25, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:21<00:25, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:22<00:30, 1.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:23<00:26, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:24<00:24, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:25<00:24, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:26<00:22, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:27<00:19, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:27<00:16, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:28<00:16, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:29<00:16, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:30<00:14, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:31<00:13, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:32<00:12, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:33<00:12, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:34<00:12, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:35<00:10, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:35<00:09, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:37<00:10, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:38<00:08, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:39<00:07, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:40<00:07, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:41<00:05, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:42<00:04, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:43<00:04, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:44<00:03, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:45<00:02, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:46<00:01, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:47<00:00, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:40:58.619\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 6 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.92}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:41:01.748\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.822 | Total tokens: 13633407 | Current cost: $0.001 | Current tokens: 8002\u001b[0m\n", "- The workflow lacks a validation step after generating the answer, which could lead to incorrect outputs being accepted without verification.\n", "- There is an inconsistency in the execution history where multiple questions resulted in incorrect solutions despite the predictions being correct, indicating potential flaws in the answer generation or validation processes.\n", "- The prompts for questions are overly repetitive and do not provide sufficient context or variation, which may lead to ambiguity in interpretation.\n", "- The workflow does not account for potential edge cases or exceptions in the data, which could result in misleading conclusions.\n", "- The control flow does not include mechanisms for handling errors or discrepancies in the predictions versus solutions, leading to unaddressed inaccuracies.\n", "\u001b[32m2026-01-01 17:41:03.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.822 | Total tokens: 13634079 | Current cost: $0.000 | Current tokens: 672\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb IER3IP1 and monitor FCER1G expression. Decide whether this perturbation leads to a significant alteration in FCER1G expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLMO2 is perturbed and the expression of FCGR2A is measured. Does this perturbation cause a significant change in FCGR2A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GBF1 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of RHCE is measured. Determine whether RHCE shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLC35B1 is perturbed and the expression of FCER1G is measured. Does this perturbation cause a significant change in FCER1G expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ARHGAP22, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TMED2 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SYVN1 is perturbed and YTHDF2 expression is quantified. Does this perturbation result in a significant change in YTHDF2 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SCYL1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NEDD8 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, STT3A is perturbed and the expression of FCER1G is measured. Determine whether FCER1G shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SRPRB is perturbed and the expression of PPCS is measured. Determine whether PPCS shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb FARSB and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DDIT3 and examine the expression of RGS16. Does perturbing DDIT3 lead to a significant change in RGS16 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DNAJC19 is perturbed and PHGDH expression is quantified. Does this perturbation result in a significant change in PHGDH expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UFM1 is perturbed and the expression of CLCA1 is measured. Does this perturbation cause a significant change in CLCA1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SOCS1 is perturbed and RPS27 expression is measured. Determine whether RPS27 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, IER3IP1 is perturbed and CLCA1 expression is measured. Determine whether CLCA1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZNF326 is associated with a significant change in RGS16 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MARS, does the expression profile of FAM129A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PDIA6 is perturbed and YTHDF2 expression is observed. Does this perturbation lead to a significant difference in YTHDF2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SRP72 and examine the expression of RPS27. Does perturbing SRP72 lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:41:06.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.823 | Total tokens: 13639244 | Current cost: $0.001 | Current tokens: 5165\u001b[0m\n", "\u001b[32m2026-01-01 17:41:08.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.823 | Total tokens: 13639339 | Current cost: $0.000 | Current tokens: 95\u001b[0m\n", "\u001b[32m2026-01-01 17:41:09.526\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.823 | Total tokens: 13640202 | Current cost: $0.000 | Current tokens: 863\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:41:12.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.824 | Total tokens: 13645419 | Current cost: $0.001 | Current tokens: 5217\u001b[0m\n", "\u001b[32m2026-01-01 17:41:14.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.824 | Total tokens: 13645513 | Current cost: $0.000 | Current tokens: 94\u001b[0m\n", "\u001b[32m2026-01-01 17:41:17.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.824 | Total tokens: 13646778 | Current cost: $0.000 | Current tokens: 1265\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:41:20.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.825 | Total tokens: 13652005 | Current cost: $0.001 | Current tokens: 5227\u001b[0m\n", "\u001b[32m2026-01-01 17:41:22.055\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.825 | Total tokens: 13652109 | Current cost: $0.000 | Current tokens: 104\u001b[0m\n", "\u001b[32m2026-01-01 17:41:24.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.825 | Total tokens: 13653443 | Current cost: $0.000 | Current tokens: 1334\u001b[0m\n", "\u001b[32m2026-01-01 17:41:24.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 7 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:01<00:50, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:02<00:50, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:03<00:51, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:04<00:48, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:05<00:44, 1.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:06<00:51, 1.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:07<00:44, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:08<00:39, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:11<01:10, 1.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:12<01:01, 1.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:13<00:51, 1.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:14<00:50, 1.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:15<00:46, 1.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:16<00:39, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:18<00:41, 1.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:19<00:37, 1.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:19<00:32, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:20<00:32, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:21<00:30, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:23<00:36, 1.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:24<00:33, 1.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:25<00:32, 1.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:26<00:28, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:27<00:27, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:28<00:26, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:29<00:23, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:30<00:21, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:31<00:20, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:32<00:21, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:33<00:18, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:33<00:17, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:34<00:16, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:35<00:15, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:36<00:13, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:37<00:12, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:38<00:12, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:39<00:11, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:40<00:10, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:40<00:09, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:41<00:08, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:42<00:07, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:43<00:06, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:44<00:05, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:45<00:05, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:46<00:04, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:46<00:03, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:48<00:03, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:49<00:01, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:50<00:01, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:51<00:00, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:42:16.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 7 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.96}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:42:20.781\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.832 | Total tokens: 13692105 | Current cost: $0.001 | Current tokens: 7979\u001b[0m\n", "- The workflow lacks a validation step after generating the answer, which could lead to incorrect outputs being used in subsequent steps.\n", "- There is no explicit handling of cases where the answer might be ambiguous or unsupported by the data, potentially leading to misleading conclusions.\n", "- The control flow does not account for scenarios where the validation of the answer fails, resulting in a lack of error handling or alternative pathways.\n", "- The prompts and intermediate steps do not specify the criteria for determining \"significant change,\" which could lead to inconsistencies in interpretation across different questions.\n", "\u001b[32m2026-01-01 17:42:22.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.832 | Total tokens: 13692754 | Current cost: $0.000 | Current tokens: 649\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TMED2 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SAMM50 and examine the expression of FCGR2A. Does perturbing SAMM50 lead to a significant change in FCGR2A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, HSD17B12 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SRP72 and examine the expression of RPS27. Does perturbing SRP72 lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SPCS2 is perturbed and the expression of SH3BGRL3 is measured. Determine whether SH3BGRL3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SRPRB is perturbed and the expression of PPCS is measured. Determine whether PPCS shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NEDD8 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, BHLHE40 is perturbed and SESN2 expression is measured. Determine whether SESN2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPZ1, does the expression profile of SH3BGRL3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SEC63 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TIMM23 and then measure expression of SH3BGRL3. Does this perturbation cause a significant change in SH3BGRL3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SCYL1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PPWD1 is associated with a significant change in CD52 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLMO2 is perturbed and the expression of FCGR2A is measured. Does this perturbation cause a significant change in FCGR2A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, IER3IP1 is perturbed and CLCA1 expression is measured. Determine whether CLCA1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, P4HB is perturbed and RHCE expression is measured. Determine whether RHCE exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UFM1 is perturbed and the expression of CLCA1 is measured. Does this perturbation cause a significant change in CLCA1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MARS, does the expression profile of FAM129A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb IER3IP1 and monitor FCER1G expression. Decide whether this perturbation leads to a significant alteration in FCER1G expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, STT3A is perturbed and the expression of FCER1G is measured. Determine whether FCER1G shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SYVN1 is perturbed and YTHDF2 expression is quantified. Does this perturbation result in a significant change in YTHDF2 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DDIT3 and examine the expression of RGS16. Does perturbing DDIT3 lead to a significant change in RGS16 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:42:24.794\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.832 | Total tokens: 13697938 | Current cost: $0.001 | Current tokens: 5184\u001b[0m\n", "\u001b[32m2026-01-01 17:42:25.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.833 | Total tokens: 13698032 | Current cost: $0.000 | Current tokens: 94\u001b[0m\n", "\u001b[32m2026-01-01 17:42:27.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.833 | Total tokens: 13698962 | Current cost: $0.000 | Current tokens: 930\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:42:29.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.834 | Total tokens: 13704184 | Current cost: $0.001 | Current tokens: 5222\u001b[0m\n", "\u001b[32m2026-01-01 17:42:30.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.834 | Total tokens: 13704288 | Current cost: $0.000 | Current tokens: 104\u001b[0m\n", "\u001b[32m2026-01-01 17:42:32.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.834 | Total tokens: 13705665 | Current cost: $0.000 | Current tokens: 1377\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:42:34.921\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.835 | Total tokens: 13710879 | Current cost: $0.001 | Current tokens: 5214\u001b[0m\n", "\u001b[32m2026-01-01 17:42:36.462\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.835 | Total tokens: 13710984 | Current cost: $0.000 | Current tokens: 105\u001b[0m\n", "\u001b[32m2026-01-01 17:42:39.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.835 | Total tokens: 13712420 | Current cost: $0.000 | Current tokens: 1436\u001b[0m\n", "\u001b[32m2026-01-01 17:42:39.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 8 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:41, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:43, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:38, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:34, 1.35it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:03<00:34, 1.32it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:04<00:37, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:40, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:40, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:07<00:36, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:08<00:34, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:09<00:37, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:10<00:38, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:12<00:40, 1.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:13<00:36, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:13<00:33, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:14<00:32, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:15<00:32, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:16<00:32, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:18<00:32, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:18<00:28, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:19<00:25, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:20<00:23, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:20<00:21, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:21<00:20, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:22<00:20, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:23<00:20, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:24<00:18, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:25<00:20, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:26<00:20, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:27<00:17, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:28<00:16, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:29<00:16, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:30<00:16, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:30<00:14, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:31<00:12, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:33<00:14, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:33<00:12, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:34<00:11, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:35<00:09, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:36<00:08, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:37<00:07, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:38<00:07, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:38<00:05, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:39<00:05, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:40<00:04, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:41<00:03, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:42<00:02, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:43<00:01, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:43<00:00, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:44<00:00, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:43:23.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 8 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.92}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:43:27.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.841 | Total tokens: 13751136 | Current cost: $0.001 | Current tokens: 8030\u001b[0m\n", "- The workflow lacks a validation step after generating the answer, which could lead to incorrect outputs being used in subsequent steps.\n", "- There is no error handling or fallback mechanism in case the validation fails or the answer is deemed invalid.\n", "- The workflow assumes that all questions can be answered with a simple 'Yes' or 'No' without considering the complexity or variability of the underlying data.\n", "- The execution history shows multiple instances where the predicted answers were incorrect, indicating potential flaws in the answer generation process or the underlying data analysis.\n", "- The workflow does not specify how the answer is generated from the question, which could lead to ambiguity in understanding the basis for the answer provided.\n", "- There is a lack of clarity on how the validation process determines the correctness of the answer, which could result in misleading outputs.\n", "\u001b[32m2026-01-01 17:43:30.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.841 | Total tokens: 13751836 | Current cost: $0.000 | Current tokens: 700\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLMO2 is perturbed and the expression of FCGR2A is measured. Does this perturbation cause a significant change in FCGR2A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLC35B1 is perturbed and the expression of FCER1G is measured. Does this perturbation cause a significant change in FCER1G expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DDIT3 and examine the expression of RGS16. Does perturbing DDIT3 lead to a significant change in RGS16 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of TELO2, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of CD52 is measured. Determine whether CD52 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SCYL1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb IER3IP1 and monitor FCER1G expression. Decide whether this perturbation leads to a significant alteration in FCER1G expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb HYOU1 and examine the expression of SNHG12. Does perturbing HYOU1 lead to a significant change in SNHG12 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of RHCE is measured. Determine whether RHCE shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ARHGAP22, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IDH3A is perturbed and SNHG12 expression is observed. Does this perturbation lead to a significant difference in SNHG12 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TMEM167A is perturbed and the expression of CD52 is measured. Does this perturbation cause a significant change in CD52 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UFM1 is perturbed and the expression of CLCA1 is measured. Does this perturbation cause a significant change in CLCA1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SYVN1 is perturbed and YTHDF2 expression is quantified. Does this perturbation result in a significant change in YTHDF2 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PPWD1 is associated with a significant change in CD52 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NEDD8 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SRP68 and then measure expression of PPCS. Does this perturbation cause a significant change in PPCS expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TIMM23 and then measure expression of SH3BGRL3. Does this perturbation cause a significant change in SH3BGRL3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSD17B12 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, BHLHE40 is perturbed and SESN2 expression is measured. Determine whether SESN2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SRP72 and examine the expression of RPS27. Does perturbing SRP72 lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, P4HB is perturbed and RHCE expression is measured. Determine whether RHCE exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRGBP and then measure expression of RPS27. Does this perturbation cause a significant change in RPS27 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MARS, does the expression profile of FAM129A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SOCS1 is perturbed and RPS27 expression is measured. Determine whether RPS27 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SPCS2 is perturbed and the expression of SH3BGRL3 is measured. Determine whether SH3BGRL3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:43:33.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.842 | Total tokens: 13757032 | Current cost: $0.001 | Current tokens: 5196\u001b[0m\n", "\u001b[32m2026-01-01 17:43:34.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.842 | Total tokens: 13757129 | Current cost: $0.000 | Current tokens: 97\u001b[0m\n", "\u001b[32m2026-01-01 17:43:35.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.842 | Total tokens: 13758116 | Current cost: $0.000 | Current tokens: 987\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:43:39.035\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.843 | Total tokens: 13763356 | Current cost: $0.001 | Current tokens: 5240\u001b[0m\n", "\u001b[32m2026-01-01 17:43:40.575\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.843 | Total tokens: 13763448 | Current cost: $0.000 | Current tokens: 92\u001b[0m\n", "\u001b[32m2026-01-01 17:43:42.210\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.844 | Total tokens: 13764936 | Current cost: $0.000 | Current tokens: 1488\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:43:44.884\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.844 | Total tokens: 13770185 | Current cost: $0.001 | Current tokens: 5249\u001b[0m\n", "\u001b[32m2026-01-01 17:43:46.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.844 | Total tokens: 13770279 | Current cost: $0.000 | Current tokens: 94\u001b[0m\n", "\u001b[32m2026-01-01 17:43:48.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.845 | Total tokens: 13771855 | Current cost: $0.000 | Current tokens: 1576\u001b[0m\n", "\u001b[32m2026-01-01 17:43:48.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 9 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:44, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:02<00:51, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:44, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:04<00:51, 1.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:05<00:47, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:06<00:44, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:39, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:35, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:08<00:34, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:09<00:32, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:09<00:32, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:10<00:30, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:11<00:32, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:12<00:30, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:13<00:29, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:14<00:28, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:14<00:25, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:15<00:24, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:16<00:27, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:17<00:29, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:19<00:30, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:20<00:27, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:21<00:31, 1.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:22<00:27, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:24<00:31, 1.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:25<00:29, 1.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:26<00:26, 1.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:27<00:23, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:28<00:22, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:28<00:19, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:29<00:16, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:30<00:16, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:31<00:16, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:32<00:15, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:33<00:14, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:34<00:13, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:35<00:11, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:36<00:11, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:37<00:09, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:38<00:09, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:38<00:07, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:39<00:06, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:40<00:05, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:41<00:05, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:42<00:04, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:43<00:03, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:43<00:02, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:44<00:01, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:45<00:00, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:46<00:00, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:44:34.678\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 9 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.94}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:44:37.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.851 | Total tokens: 13810525 | Current cost: $0.001 | Current tokens: 8025\u001b[0m\n", "- The workflow lacks a validation step that checks for consistency between the predicted answer and the actual solution, leading to potential discrepancies in the final output.\n", "- There are multiple instances of incorrect computation results that resulted in a score of 0.0, indicating a failure in the workflow to handle certain perturbation scenarios effectively.\n", "- The workflow does not include a mechanism to address or log errors encountered during the execution, which could help in understanding the reasons behind incorrect predictions.\n", "- The workflow assumes that all questions can be answered with a binary response without considering the complexity or nuances of the biological context, which may lead to oversimplification of the answers.\n", "- The ordering of steps could be improved; validation should ideally occur before contextualization to ensure that only validated answers are further processed.\n", "\u001b[32m2026-01-01 17:44:39.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.851 | Total tokens: 13811220 | Current cost: $0.000 | Current tokens: 695\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CHERP is perturbed and NPL expression is quantified. Does this perturbation result in a significant change in NPL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TMED2 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of CD52 is measured. Determine whether CD52 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SPCS2 is perturbed and the expression of SH3BGRL3 is measured. Determine whether SH3BGRL3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLC35B1 is perturbed and the expression of FCER1G is measured. Does this perturbation cause a significant change in FCER1G expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SRP72 and examine the expression of RPS27. Does perturbing SRP72 lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SRPRB is perturbed and the expression of PPCS is measured. Determine whether PPCS shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, BHLHE40 is perturbed and SESN2 expression is measured. Determine whether SESN2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLMO2 is perturbed and the expression of FCGR2A is measured. Does this perturbation cause a significant change in FCGR2A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, HSD17B12 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SOCS1 is perturbed and RPS27 expression is measured. Determine whether RPS27 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DNAJC19 is perturbed and PHGDH expression is quantified. Does this perturbation result in a significant change in PHGDH expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TIMM23 and then measure expression of SH3BGRL3. Does this perturbation cause a significant change in SH3BGRL3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SRP68 and then measure expression of PPCS. Does this perturbation cause a significant change in PPCS expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IDH3A is perturbed and SNHG12 expression is observed. Does this perturbation lead to a significant difference in SNHG12 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GBF1 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPZ1, does the expression profile of SH3BGRL3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DDIT3 and examine the expression of RGS16. Does perturbing DDIT3 lead to a significant change in RGS16 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of TELO2, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ARHGAP22, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZNF326 is associated with a significant change in RGS16 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SAMM50 and examine the expression of FCGR2A. Does perturbing SAMM50 lead to a significant change in FCGR2A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MARS, does the expression profile of FAM129A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEC63 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:44:42.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.852 | Total tokens: 13816369 | Current cost: $0.001 | Current tokens: 5149\u001b[0m\n", "\u001b[32m2026-01-01 17:44:43.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.852 | Total tokens: 13816468 | Current cost: $0.000 | Current tokens: 99\u001b[0m\n", "\u001b[32m2026-01-01 17:44:44.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.852 | Total tokens: 13817511 | Current cost: $0.000 | Current tokens: 1043\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:44:46.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.853 | Total tokens: 13822698 | Current cost: $0.001 | Current tokens: 5187\u001b[0m\n", "\u001b[32m2026-01-01 17:44:47.499\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.853 | Total tokens: 13822801 | Current cost: $0.000 | Current tokens: 103\u001b[0m\n", "\u001b[32m2026-01-01 17:44:49.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.853 | Total tokens: 13824408 | Current cost: $0.000 | Current tokens: 1607\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:44:53.013\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.854 | Total tokens: 13829602 | Current cost: $0.001 | Current tokens: 5194\u001b[0m\n", "\u001b[32m2026-01-01 17:44:54.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.854 | Total tokens: 13829698 | Current cost: $0.000 | Current tokens: 96\u001b[0m\n", "\u001b[32m2026-01-01 17:44:55.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.854 | Total tokens: 13831399 | Current cost: $0.000 | Current tokens: 1701\u001b[0m\n", "\u001b[32m2026-01-01 17:44:56.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 10 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:39, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:39, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:37, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:38, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:03<00:34, 1.30it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:04<00:33, 1.33it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:46, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:45, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:08<00:39, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:09<00:36, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:09<00:33, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:10<00:32, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:11<00:33, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:12<00:32, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:13<00:30, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:14<00:30, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:15<00:31, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:17<00:43, 1.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:19<00:42, 1.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:19<00:37, 1.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:20<00:31, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:21<00:27, 1.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:22<00:25, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:23<00:23, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:24<00:22, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:24<00:21, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:25<00:19, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:26<00:22, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:27<00:20, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:28<00:18, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:29<00:18, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:30<00:16, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:31<00:13, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:31<00:13, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:33<00:14, 1.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:34<00:14, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:35<00:13, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:36<00:11, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:37<00:10, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:38<00:10, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:39<00:09, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:40<00:08, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:41<00:07, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:43<00:06, 1.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:43<00:05, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:44<00:03, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:45<00:02, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:46<00:01, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:48<00:01, 1.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:48<00:00, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:45:44.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 10 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.9}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:45:48.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.861 | Total tokens: 13870104 | Current cost: $0.001 | Current tokens: 8025\u001b[0m\n", "- The workflow lacks a validation step that checks for the consistency of the question and the expected answer format, which could lead to errors in interpretation.\n", "- There are instances of incorrect computation results leading to discrepancies between predictions and solutions, indicating potential flaws in the answer generation or validation process.\n", "- The workflow does not account for the possibility of ambiguous or poorly defined questions, which could result in misinterpretation of the required answer format.\n", "- The control flow does not include a mechanism for handling cases where the answer is not strictly 'Yes' or 'No', which could lead to premature termination or incorrect outputs.\n", "- The repeated occurrence of incorrect solutions suggests a systemic issue in the underlying model or data used for generating answers, indicating a need for more robust validation of the answers produced.\n", "\u001b[32m2026-01-01 17:45:50.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.861 | Total tokens: 13870799 | Current cost: $0.000 | Current tokens: 695\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SOCS1 is perturbed and RPS27 expression is measured. Determine whether RPS27 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TMEM167A is perturbed and the expression of CD52 is measured. Does this perturbation cause a significant change in CD52 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SRP68 and then measure expression of PPCS. Does this perturbation cause a significant change in PPCS expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PDIA6 is perturbed and YTHDF2 expression is observed. Does this perturbation lead to a significant difference in YTHDF2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLMO2 is perturbed and the expression of FCGR2A is measured. Does this perturbation cause a significant change in FCGR2A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLC35B1 is perturbed and the expression of FCER1G is measured. Does this perturbation cause a significant change in FCER1G expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TIMM23 and then measure expression of SH3BGRL3. Does this perturbation cause a significant change in SH3BGRL3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IDH3A is perturbed and SNHG12 expression is observed. Does this perturbation lead to a significant difference in SNHG12 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, P4HB is perturbed and RHCE expression is measured. Determine whether RHCE exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SRPRB is perturbed and the expression of PPCS is measured. Determine whether PPCS shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZNF326 is associated with a significant change in RGS16 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ARHGAP22, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRGBP and then measure expression of RPS27. Does this perturbation cause a significant change in RPS27 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of TELO2, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SLC35B1 and examine the expression of RHCE. Does perturbing SLC35B1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPZ1, does the expression profile of SH3BGRL3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DNAJC19 is perturbed and PHGDH expression is quantified. Does this perturbation result in a significant change in PHGDH expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DDIT3 and examine the expression of RGS16. Does perturbing DDIT3 lead to a significant change in RGS16 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEC63 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, BHLHE40 is perturbed and SESN2 expression is measured. Determine whether SESN2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PPWD1 is associated with a significant change in CD52 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb FARSB and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb IER3IP1 and monitor FCER1G expression. Decide whether this perturbation leads to a significant alteration in FCER1G expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MARS, does the expression profile of FAM129A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SYVN1 is perturbed and YTHDF2 expression is quantified. Does this perturbation result in a significant change in YTHDF2 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:45:53.881\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.862 | Total tokens: 13875994 | Current cost: $0.001 | Current tokens: 5195\u001b[0m\n", "\u001b[32m2026-01-01 17:45:55.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.862 | Total tokens: 13876102 | Current cost: $0.000 | Current tokens: 108\u001b[0m\n", "\u001b[32m2026-01-01 17:45:57.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.862 | Total tokens: 13877226 | Current cost: $0.000 | Current tokens: 1124\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:45:59.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.863 | Total tokens: 13882447 | Current cost: $0.001 | Current tokens: 5221\u001b[0m\n", "\u001b[32m2026-01-01 17:46:00.526\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.863 | Total tokens: 13882542 | Current cost: $0.000 | Current tokens: 95\u001b[0m\n", "\u001b[32m2026-01-01 17:46:02.606\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.863 | Total tokens: 13884248 | Current cost: $0.000 | Current tokens: 1706\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:46:06.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.864 | Total tokens: 13889519 | Current cost: $0.001 | Current tokens: 5271\u001b[0m\n", "\u001b[32m2026-01-01 17:46:07.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.864 | Total tokens: 13889614 | Current cost: $0.000 | Current tokens: 95\u001b[0m\n", "\u001b[32m2026-01-01 17:46:10.574\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.864 | Total tokens: 13891504 | Current cost: $0.000 | Current tokens: 1890\u001b[0m\n", "\u001b[32m2026-01-01 17:46:10.574\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 11 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:38, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:47, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:42, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:36, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:37, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:40, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:42, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:45, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:08<00:45, 1.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:09<00:40, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:10<00:42, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:12<00:42, 1.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:13<00:38, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:13<00:35, 1.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:14<00:33, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:15<00:31, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:16<00:30, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:17<00:26, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:18<00:28, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:19<00:27, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:20<00:25, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:21<00:26, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:22<00:25, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:23<00:24, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:23<00:22, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:24<00:21, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:25<00:20, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:26<00:19, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:27<00:17, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:27<00:16, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:28<00:15, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:30<00:17, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:30<00:15, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:31<00:14, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:32<00:12, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:33<00:12, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:34<00:11, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:35<00:10, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:35<00:09, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:36<00:08, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:37<00:07, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:38<00:06, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:39<00:06, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:40<00:05, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:41<00:04, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:41<00:03, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:42<00:02, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:43<00:01, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:44<00:00, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:45<00:00, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:46:55.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 11 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.96}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:46:59.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.871 | Total tokens: 13930209 | Current cost: $0.001 | Current tokens: 8021\u001b[0m\n", "- The workflow lacks a validation step for the initial question input, which could lead to processing incorrect or irrelevant questions.\n", "- There is no error handling mechanism in place to address cases where the validation of the answer fails or where the answer generation step produces unexpected results.\n", "- The workflow assumes that all questions will yield a binary response ('Yes' or 'No'), which may not account for ambiguous or unclear questions that require further clarification.\n", "- The final answer is derived from a single validation step without cross-referencing or corroborating evidence from the execution history, which could lead to inconsistencies in the final output.\n", "- The workflow does not include a mechanism to track or log the reasoning behind the answer generation, which could help in understanding discrepancies in predictions and solutions.\n", "\u001b[32m2026-01-01 17:47:00.620\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.871 | Total tokens: 13930928 | Current cost: $0.000 | Current tokens: 719\u001b[0m\n", "```python\n", "steps = [\n", " {'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", " {'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", " {'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']},\n", " {'name': 'log_reasoning', 'args': ['question', 'validated_answer'], 'outputs': []}\n", "]\n", "```\n", "Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SPCS2 is perturbed and the expression of SH3BGRL3 is measured. Determine whether SH3BGRL3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PDIA6 is perturbed and YTHDF2 expression is observed. Does this perturbation lead to a significant difference in YTHDF2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TIMM23 and then measure expression of SH3BGRL3. Does this perturbation cause a significant change in SH3BGRL3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DDIT3 and examine the expression of RGS16. Does perturbing DDIT3 lead to a significant change in RGS16 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, P4HB is perturbed and RHCE expression is measured. Determine whether RHCE exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of TELO2, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLMO2 is perturbed and the expression of FCGR2A is measured. Does this perturbation cause a significant change in FCGR2A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GBF1 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SRP68 and then measure expression of PPCS. Does this perturbation cause a significant change in PPCS expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SLC35B1 and examine the expression of RHCE. Does perturbing SLC35B1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ARHGAP22, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb HYOU1 and examine the expression of SNHG12. Does perturbing HYOU1 lead to a significant change in SNHG12 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SRPRB is perturbed and the expression of PPCS is measured. Determine whether PPCS shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CHERP is perturbed and NPL expression is quantified. Does this perturbation result in a significant change in NPL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, BHLHE40 is perturbed and SESN2 expression is measured. Determine whether SESN2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZNF326 is associated with a significant change in RGS16 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSD17B12 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TMEM167A is perturbed and the expression of CD52 is measured. Does this perturbation cause a significant change in CD52 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MARS, does the expression profile of FAM129A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRGBP and then measure expression of RPS27. Does this perturbation cause a significant change in RPS27 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of RHCE is measured. Determine whether RHCE shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NEDD8 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:47:03.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.872 | Total tokens: 13936139 | Current cost: $0.001 | Current tokens: 5211\u001b[0m\n", "\u001b[32m2026-01-01 17:47:04.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.872 | Total tokens: 13936238 | Current cost: $0.000 | Current tokens: 99\u001b[0m\n", "\u001b[32m2026-01-01 17:47:05.161\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.872 | Total tokens: 13937433 | Current cost: $0.000 | Current tokens: 1195\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:47:07.881\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.873 | Total tokens: 13942685 | Current cost: $0.001 | Current tokens: 5252\u001b[0m\n", "\u001b[32m2026-01-01 17:47:09.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.873 | Total tokens: 13942780 | Current cost: $0.000 | Current tokens: 95\u001b[0m\n", "\u001b[32m2026-01-01 17:47:12.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.873 | Total tokens: 13944641 | Current cost: $0.000 | Current tokens: 1861\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:47:14.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.874 | Total tokens: 13949893 | Current cost: $0.001 | Current tokens: 5252\u001b[0m\n", "\u001b[32m2026-01-01 17:47:16.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.874 | Total tokens: 13950016 | Current cost: $0.000 | Current tokens: 123\u001b[0m\n", "\u001b[32m2026-01-01 17:47:19.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.874 | Total tokens: 13952056 | Current cost: $0.000 | Current tokens: 2040\u001b[0m\n", "{'name': 'log_reasoning483', 'description': 'Task to log_reasoning483. Takes question, validated_answer as input. ', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'Input parameter question for log_reasoning483', 'required': False}, {'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for log_reasoning483', 'required': False}], 'outputs': [], 'prompt': 'Your are a task solver.', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:47:21.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.875 | Total tokens: 13957341 | Current cost: $0.001 | Current tokens: 5285\u001b[0m\n", "\u001b[32m2026-01-01 17:47:22.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.875 | Total tokens: 13957436 | Current cost: $0.000 | Current tokens: 95\u001b[0m\n", "\u001b[32m2026-01-01 17:47:25.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.875 | Total tokens: 13958091 | Current cost: $0.000 | Current tokens: 655\u001b[0m\n", "\u001b[32m2026-01-01 17:47:25.136\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 12 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:01<00:56, 1.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:43, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:03<00:47, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:41, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:44, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:40, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:40, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:38, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:08<00:35, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:08<00:32, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:09<00:34, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:10<00:32, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:11<00:32, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:12<00:30, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:13<00:34, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:14<00:34, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:16<00:36, 1.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:17<00:34, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:18<00:38, 1.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:19<00:33, 1.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:20<00:33, 1.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:22<00:32, 1.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:22<00:27, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:23<00:24, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:24<00:21, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:24<00:20, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:26<00:21, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:27<00:24, 1.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:28<00:24, 1.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:29<00:21, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:30<00:18, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:31<00:16, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:32<00:15, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:32<00:13, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:34<00:15, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:35<00:15, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:36<00:13, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:37<00:12, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:38<00:11, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:39<00:10, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:40<00:08, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:41<00:07, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:42<00:06, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:42<00:05, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:43<00:04, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:44<00:03, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:45<00:02, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:46<00:01, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:47<00:01, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:49<00:00, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:48:14.162\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 12 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.92}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:48:16.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.881 | Total tokens: 13996839 | Current cost: $0.001 | Current tokens: 8022\u001b[0m\n", "- The workflow lacks a validation step after generating the answer, which could lead to unverified outputs being used in subsequent steps.\n", "- There is an inconsistency in the execution history where multiple questions resulted in incorrect solutions despite having the same structure and format, indicating potential issues with the underlying model or data interpretation.\n", "- The workflow does not account for potential ambiguities in the questions, which could lead to misinterpretation of the required answers.\n", "- The control flow does not include error handling for cases where the validation fails, which could result in unhandled exceptions or incorrect outputs being presented.\n", "- The assumption that all questions can be answered with a simple 'Yes' or 'No' may not hold true for all contexts, potentially oversimplifying complex biological scenarios.\n", "\u001b[32m2026-01-01 17:48:18.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.882 | Total tokens: 13997531 | Current cost: $0.000 | Current tokens: 692\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ARHGAP22, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of RHCE is measured. Determine whether RHCE shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLC35B1 is perturbed and the expression of FCER1G is measured. Does this perturbation cause a significant change in FCER1G expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TIMM23 and then measure expression of SH3BGRL3. Does this perturbation cause a significant change in SH3BGRL3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, IER3IP1 is perturbed and CLCA1 expression is measured. Determine whether CLCA1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MARS, does the expression profile of FAM129A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SLC35B1 and examine the expression of RHCE. Does perturbing SLC35B1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CHERP is perturbed and NPL expression is quantified. Does this perturbation result in a significant change in NPL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DNAJC19 is perturbed and PHGDH expression is quantified. Does this perturbation result in a significant change in PHGDH expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, BHLHE40 is perturbed and SESN2 expression is measured. Determine whether SESN2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, STT3A is perturbed and the expression of FCER1G is measured. Determine whether FCER1G shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UFM1 is perturbed and the expression of CLCA1 is measured. Does this perturbation cause a significant change in CLCA1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb HYOU1 and examine the expression of SNHG12. Does perturbing HYOU1 lead to a significant change in SNHG12 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SRP72 and examine the expression of RPS27. Does perturbing SRP72 lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of CD52 is measured. Determine whether CD52 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DDIT3 and examine the expression of RGS16. Does perturbing DDIT3 lead to a significant change in RGS16 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IDH3A is perturbed and SNHG12 expression is observed. Does this perturbation lead to a significant difference in SNHG12 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SEC63 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NEDD8 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEC63 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLMO2 is perturbed and the expression of FCGR2A is measured. Does this perturbation cause a significant change in FCGR2A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRGBP and then measure expression of RPS27. Does this perturbation cause a significant change in RPS27 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PDIA6 is perturbed and YTHDF2 expression is observed. Does this perturbation lead to a significant difference in YTHDF2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SRPRB is perturbed and the expression of PPCS is measured. Determine whether PPCS shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SYVN1 is perturbed and YTHDF2 expression is quantified. Does this perturbation result in a significant change in YTHDF2 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SAMM50 and examine the expression of FCGR2A. Does perturbing SAMM50 lead to a significant change in FCGR2A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TMED2 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSD17B12 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:48:21.140\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.882 | Total tokens: 14002760 | Current cost: $0.001 | Current tokens: 5229\u001b[0m\n", "\u001b[32m2026-01-01 17:48:22.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.882 | Total tokens: 14002864 | Current cost: $0.000 | Current tokens: 104\u001b[0m\n", "\u001b[32m2026-01-01 17:48:23.498\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.883 | Total tokens: 14004096 | Current cost: $0.000 | Current tokens: 1232\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:48:25.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.884 | Total tokens: 14009378 | Current cost: $0.001 | Current tokens: 5282\u001b[0m\n", "\u001b[32m2026-01-01 17:48:26.846\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.884 | Total tokens: 14009478 | Current cost: $0.000 | Current tokens: 100\u001b[0m\n", "\u001b[32m2026-01-01 17:48:29.163\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.884 | Total tokens: 14011440 | Current cost: $0.000 | Current tokens: 1962\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:48:32.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.885 | Total tokens: 14016730 | Current cost: $0.001 | Current tokens: 5290\u001b[0m\n", "\u001b[32m2026-01-01 17:48:33.260\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.885 | Total tokens: 14016830 | Current cost: $0.000 | Current tokens: 100\u001b[0m\n", "\u001b[32m2026-01-01 17:48:35.839\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.885 | Total tokens: 14018968 | Current cost: $0.000 | Current tokens: 2138\u001b[0m\n", "\u001b[32m2026-01-01 17:48:35.840\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 13 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:01<00:55, 1.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:42, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:45, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:41, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:45, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:40, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:41, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:41, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:09<00:47, 1.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:10<00:41, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:10<00:36, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:11<00:34, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:12<00:32, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:13<00:31, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:14<00:30, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:14<00:29, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:16<00:33, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:17<00:38, 1.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:19<00:37, 1.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:20<00:34, 1.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:21<00:30, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:21<00:27, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:23<00:28, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:23<00:25, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:26<00:37, 1.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:27<00:33, 1.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:28<00:28, 1.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:30<00:28, 1.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:31<00:25, 1.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:31<00:21, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:32<00:18, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:33<00:17, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:34<00:18, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:35<00:15, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:36<00:13, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:37<00:12, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:38<00:11, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:38<00:09, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:39<00:09, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:40<00:08, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:41<00:07, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:41<00:06, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:43<00:06, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:44<00:05, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:44<00:04, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:45<00:03, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:46<00:02, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:47<00:01, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:48<00:00, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:48<00:00, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:49:24.682\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 13 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.92}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:49:27.648\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.891 | Total tokens: 14057615 | Current cost: $0.001 | Current tokens: 7977\u001b[0m\n", "- The workflow lacks a validation step after generating the answer, which could lead to incorrect outputs being used in subsequent steps.\n", "- There is no mechanism to handle or report errors in the validation process, which may result in unhandled cases or misleading outputs.\n", "- The workflow assumes that all generated answers are valid without any checks for consistency or correctness before proceeding to contextualization.\n", "- The prompt and intermediate steps do not specify how to handle cases where the answer is ambiguous or not clearly defined, leading to potential misinterpretation of the question.\n", "\u001b[32m2026-01-01 17:49:29.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.892 | Total tokens: 14058262 | Current cost: $0.000 | Current tokens: 647\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SEC63 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SAMM50 and examine the expression of FCGR2A. Does perturbing SAMM50 lead to a significant change in FCGR2A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MARS, does the expression profile of FAM129A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRGBP and then measure expression of RPS27. Does this perturbation cause a significant change in RPS27 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb FARSB and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPZ1, does the expression profile of SH3BGRL3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEC63 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, IER3IP1 is perturbed and CLCA1 expression is measured. Determine whether CLCA1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, STT3A is perturbed and the expression of FCER1G is measured. Determine whether FCER1G shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb IER3IP1 and monitor FCER1G expression. Decide whether this perturbation leads to a significant alteration in FCER1G expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ARHGAP22, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, BHLHE40 is perturbed and SESN2 expression is measured. Determine whether SESN2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of RHCE is measured. Determine whether RHCE shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TMEM167A is perturbed and the expression of CD52 is measured. Does this perturbation cause a significant change in CD52 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PPWD1 is associated with a significant change in CD52 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NEDD8 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, HSD17B12 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SRPRB is perturbed and the expression of PPCS is measured. Determine whether PPCS shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CHERP is perturbed and NPL expression is quantified. Does this perturbation result in a significant change in NPL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SRP72 and examine the expression of RPS27. Does perturbing SRP72 lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb HYOU1 and examine the expression of SNHG12. Does perturbing HYOU1 lead to a significant change in SNHG12 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DNAJC19 is perturbed and PHGDH expression is quantified. Does this perturbation result in a significant change in PHGDH expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:49:31.566\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.892 | Total tokens: 14063432 | Current cost: $0.001 | Current tokens: 5170\u001b[0m\n", "\u001b[32m2026-01-01 17:49:33.015\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.892 | Total tokens: 14063554 | Current cost: $0.000 | Current tokens: 122\u001b[0m\n", "\u001b[32m2026-01-01 17:49:34.588\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.893 | Total tokens: 14064867 | Current cost: $0.000 | Current tokens: 1313\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:49:37.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.893 | Total tokens: 14070098 | Current cost: $0.001 | Current tokens: 5231\u001b[0m\n", "\u001b[32m2026-01-01 17:49:38.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.894 | Total tokens: 14070185 | Current cost: $0.000 | Current tokens: 87\u001b[0m\n", "\u001b[32m2026-01-01 17:49:41.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.894 | Total tokens: 14072255 | Current cost: $0.000 | Current tokens: 2070\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:49:43.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.895 | Total tokens: 14077492 | Current cost: $0.001 | Current tokens: 5237\u001b[0m\n", "\u001b[32m2026-01-01 17:49:44.618\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.895 | Total tokens: 14077589 | Current cost: $0.000 | Current tokens: 97\u001b[0m\n", "\u001b[32m2026-01-01 17:49:47.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.895 | Total tokens: 14079873 | Current cost: $0.000 | Current tokens: 2284\u001b[0m\n", "\u001b[32m2026-01-01 17:49:47.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 14 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:01<01:06, 1.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:02<00:49, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:40, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:40, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:35, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:37, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:36, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:37, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:07<00:35, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:09<00:38, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:09<00:35, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:10<00:35, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:11<00:32, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:12<00:33, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:13<00:31, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:14<00:32, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:16<00:39, 1.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:17<00:40, 1.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:19<00:43, 1.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:20<00:38, 1.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:21<00:35, 1.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:22<00:30, 1.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:23<00:30, 1.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:24<00:26, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:25<00:25, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:26<00:22, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:26<00:20, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:27<00:18, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:28<00:17, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:29<00:18, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:30<00:16, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:31<00:15, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:31<00:13, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:32<00:14, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:34<00:14, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:35<00:13, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:36<00:12, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:36<00:10, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:37<00:10, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:38<00:09, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:39<00:08, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:40<00:06, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:40<00:05, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:41<00:05, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:42<00:04, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:43<00:03, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:44<00:02, 1.30it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:45<00:01, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:45<00:00, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:46<00:00, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:50:33.676\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 14 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.9}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:50:37.715\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.901 | Total tokens: 14118617 | Current cost: $0.001 | Current tokens: 8049\u001b[0m\n", "- The workflow lacks a validation step for the initial question input before generating an answer, which could lead to incorrect assumptions being made.\n", "- The validation step for the answer does not specify the criteria for validation, leading to potential inconsistencies in what is considered a \"validated answer.\"\n", "- There is no error handling or fallback mechanism in case the answer validation fails, which could result in unhandled cases or premature termination of the workflow.\n", "- The workflow assumes that all questions can be answered with a simple 'Yes' or 'No' without considering the complexity of the underlying data, which may not always be appropriate.\n", "- The execution history shows multiple instances where the predicted answers were incorrect, indicating a flaw in the answer generation process that is not addressed in the workflow.\n", "- The workflow does not account for the possibility of conflicting results from the validation step, which could lead to ambiguity in the final answer.\n", "\u001b[32m2026-01-01 17:50:40.443\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.902 | Total tokens: 14119339 | Current cost: $0.000 | Current tokens: 722\u001b[0m\n", "```python\n", "steps = [\n", " {'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", " {'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", " {'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, STT3A is perturbed and the expression of FCER1G is measured. Determine whether FCER1G shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLC35B1 is perturbed and the expression of FCER1G is measured. Does this perturbation cause a significant change in FCER1G expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSD17B12 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of TELO2, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UFM1 is perturbed and the expression of CLCA1 is measured. Does this perturbation cause a significant change in CLCA1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TMED2 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SCYL1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, IER3IP1 is perturbed and CLCA1 expression is measured. Determine whether CLCA1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SPCS2 is perturbed and the expression of SH3BGRL3 is measured. Determine whether SH3BGRL3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TIMM23 and then measure expression of SH3BGRL3. Does this perturbation cause a significant change in SH3BGRL3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ARHGAP22, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRGBP and then measure expression of RPS27. Does this perturbation cause a significant change in RPS27 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb IER3IP1 and monitor FCER1G expression. Decide whether this perturbation leads to a significant alteration in FCER1G expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TMEM167A is perturbed and the expression of CD52 is measured. Does this perturbation cause a significant change in CD52 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PPWD1 is associated with a significant change in CD52 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, BHLHE40 is perturbed and SESN2 expression is measured. Determine whether SESN2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SRP72 and examine the expression of RPS27. Does perturbing SRP72 lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DNAJC19 is perturbed and PHGDH expression is quantified. Does this perturbation result in a significant change in PHGDH expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SAMM50 and examine the expression of FCGR2A. Does perturbing SAMM50 lead to a significant change in FCGR2A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb FARSB and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEC63 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of CD52 is measured. Determine whether CD52 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SOCS1 is perturbed and RPS27 expression is measured. Determine whether RPS27 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:50:43.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.902 | Total tokens: 14124569 | Current cost: $0.001 | Current tokens: 5230\u001b[0m\n", "\u001b[32m2026-01-01 17:50:47.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.903 | Total tokens: 14124820 | Current cost: $0.000 | Current tokens: 251\u001b[0m\n", "\u001b[32m2026-01-01 17:50:48.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.903 | Total tokens: 14126359 | Current cost: $0.000 | Current tokens: 1539\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:50:51.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.904 | Total tokens: 14131605 | Current cost: $0.001 | Current tokens: 5246\u001b[0m\n", "\u001b[32m2026-01-01 17:50:52.023\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.904 | Total tokens: 14131700 | Current cost: $0.000 | Current tokens: 95\u001b[0m\n", "\u001b[32m2026-01-01 17:50:55.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.904 | Total tokens: 14133880 | Current cost: $0.000 | Current tokens: 2180\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:50:58.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.905 | Total tokens: 14139142 | Current cost: $0.001 | Current tokens: 5262\u001b[0m\n", "\u001b[32m2026-01-01 17:50:59.529\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.905 | Total tokens: 14139237 | Current cost: $0.000 | Current tokens: 95\u001b[0m\n", "\u001b[32m2026-01-01 17:51:01.961\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.905 | Total tokens: 14141646 | Current cost: $0.000 | Current tokens: 2409\u001b[0m\n", "\u001b[32m2026-01-01 17:51:01.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 15 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:39, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:39, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:38, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:38, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:35, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:37, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:41, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:44, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:08<00:39, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:09<00:37, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:10<00:37, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:11<00:37, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:11<00:33, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:13<00:36, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:14<00:34, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:14<00:31, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:15<00:30, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:16<00:28, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:17<00:27, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:18<00:29, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:19<00:27, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:20<00:25, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:21<00:26, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:22<00:24, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:23<00:23, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:24<00:24, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:26<00:27, 1.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:26<00:23, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:27<00:20, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:28<00:17, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:29<00:17, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:31<00:21, 1.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:31<00:18, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:32<00:15, 1.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:33<00:14, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:34<00:12, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:35<00:12, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:36<00:11, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:37<00:09, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:38<00:09, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:39<00:08, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:39<00:07, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:40<00:06, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:42<00:06, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:43<00:05, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:44<00:03, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:44<00:02, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:45<00:01, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:46<00:00, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:47<00:00, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:51:49.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 15 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.88}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:51:53.575\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.912 | Total tokens: 14180373 | Current cost: $0.001 | Current tokens: 8036\u001b[0m\n", "- The workflow lacks a step to handle cases where the answer is not straightforward or requires additional context, leading to potential oversights in complex scenarios.\n", "- There is no validation step to ensure that the generated answer aligns with the expected format ('Final Answer: Yes' or 'Final Answer: No'), which could result in format inconsistencies.\n", "- The workflow does not include error handling for cases where the validation of the answer fails, leading to unaddressed discrepancies in the final output.\n", "- The assumption that all questions can be answered with a simple 'Yes' or 'No' may not hold true for all perturbation experiments, indicating a lack of flexibility in the workflow.\n", "- The execution history shows multiple instances where the predicted answers were correct, but the solutions were marked incorrect, suggesting a flaw in the validation or scoring mechanism.\n", "\u001b[32m2026-01-01 17:51:54.880\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.912 | Total tokens: 14181079 | Current cost: $0.000 | Current tokens: 706\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['answer', 'question'], 'outputs': ['final_answer']},\n", "{'name': 'validate_answer2087', 'args': ['final_answer'], 'outputs': ['validated_answer']}\n", "]\n", "```\n", "Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PDIA6 is perturbed and YTHDF2 expression is observed. Does this perturbation lead to a significant difference in YTHDF2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ARHGAP22, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TIMM23 and then measure expression of SH3BGRL3. Does this perturbation cause a significant change in SH3BGRL3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb IER3IP1 and monitor FCER1G expression. Decide whether this perturbation leads to a significant alteration in FCER1G expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SRP68 and then measure expression of PPCS. Does this perturbation cause a significant change in PPCS expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of CD52 is measured. Determine whether CD52 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UFM1 is perturbed and the expression of CLCA1 is measured. Does this perturbation cause a significant change in CLCA1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, STT3A is perturbed and the expression of FCER1G is measured. Determine whether FCER1G shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb FARSB and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SRP72 and examine the expression of RPS27. Does perturbing SRP72 lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRGBP and then measure expression of RPS27. Does this perturbation cause a significant change in RPS27 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DDIT3 and examine the expression of RGS16. Does perturbing DDIT3 lead to a significant change in RGS16 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IDH3A is perturbed and SNHG12 expression is observed. Does this perturbation lead to a significant difference in SNHG12 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of RHCE is measured. Determine whether RHCE shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SEC63 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SOCS1 is perturbed and RPS27 expression is measured. Determine whether RPS27 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TMEM167A is perturbed and the expression of CD52 is measured. Does this perturbation cause a significant change in CD52 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NEDD8 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TMED2 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DNAJC19 is perturbed and PHGDH expression is quantified. Does this perturbation result in a significant change in PHGDH expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPZ1, does the expression profile of SH3BGRL3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CHERP is perturbed and NPL expression is quantified. Does this perturbation result in a significant change in NPL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SCYL1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PPWD1 is associated with a significant change in CD52 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SLC35B1 and examine the expression of RHCE. Does perturbing SLC35B1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSD17B12 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLMO2 is perturbed and the expression of FCGR2A is measured. Does this perturbation cause a significant change in FCGR2A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:51:58.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.913 | Total tokens: 14186298 | Current cost: $0.001 | Current tokens: 5219\u001b[0m\n", "\u001b[32m2026-01-01 17:52:00.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.913 | Total tokens: 14186401 | Current cost: $0.000 | Current tokens: 103\u001b[0m\n", "\u001b[32m2026-01-01 17:52:01.923\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.913 | Total tokens: 14187841 | Current cost: $0.000 | Current tokens: 1440\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:52:04.376\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.914 | Total tokens: 14193092 | Current cost: $0.001 | Current tokens: 5251\u001b[0m\n", "\u001b[32m2026-01-01 17:52:05.230\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.914 | Total tokens: 14193187 | Current cost: $0.000 | Current tokens: 95\u001b[0m\n", "\u001b[32m2026-01-01 17:52:07.762\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.914 | Total tokens: 14195716 | Current cost: $0.000 | Current tokens: 2529\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:52:10.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.915 | Total tokens: 14200974 | Current cost: $0.001 | Current tokens: 5258\u001b[0m\n", "\u001b[32m2026-01-01 17:52:10.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.915 | Total tokens: 14201073 | Current cost: $0.000 | Current tokens: 99\u001b[0m\n", "\u001b[32m2026-01-01 17:52:13.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.915 | Total tokens: 14203385 | Current cost: $0.000 | Current tokens: 2312\u001b[0m\n", "\u001b[32m2026-01-01 17:52:13.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 16 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:01<00:54, 1.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:02<01:00, 1.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:03<00:59, 1.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:04<00:49, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:05<00:44, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:06<00:50, 1.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:07<00:46, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:08<00:43, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:10<00:46, 1.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:11<00:49, 1.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:12<00:42, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:13<00:42, 1.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:14<00:39, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:15<00:34, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:16<00:33, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:16<00:31, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:17<00:32, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:18<00:30, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:19<00:29, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:20<00:27, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:21<00:24, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:22<00:23, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:23<00:23, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:24<00:24, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:25<00:23, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:26<00:21, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:26<00:20, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:27<00:21, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:28<00:18, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:29<00:17, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:30<00:16, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:31<00:18, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:32<00:16, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:33<00:14, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:34<00:15, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:35<00:13, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:36<00:11, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:36<00:10, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:37<00:09, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:38<00:08, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:39<00:07, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:40<00:06, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:41<00:06, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:42<00:06, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:44<00:05, 1.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:45<00:04, 1.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:46<00:03, 1.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:47<00:02, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:48<00:01, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:49<00:00, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:53:02.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 16 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.94}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:53:05.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.922 | Total tokens: 14242159 | Current cost: $0.001 | Current tokens: 8030\u001b[0m\n", "- The workflow lacks a validation step to ensure that the initial predictions align with the expected format of 'Final Answer: Yes' or 'Final Answer: No', which could lead to incorrect outputs being accepted as valid.\n", "- There are multiple instances of incorrect computation results leading to discrepancies between predictions and solutions, indicating a failure in the validation or computation process.\n", "- The workflow does not include any error handling or mechanisms to address cases where the predictions do not match the solutions, which could result in unhandled exceptions or misleading outputs.\n", "- The prompt structure for questions is overly repetitive and does not account for variations in phrasing or context, potentially leading to ambiguity in interpretation.\n", "- The control flow does not account for the possibility of conflicting results from different questions, which could undermine the reliability of the overall workflow.\n", "\u001b[32m2026-01-01 17:53:07.363\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.922 | Total tokens: 14242862 | Current cost: $0.000 | Current tokens: 703\u001b[0m\n", "```python\n", "steps = [\n", " {'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", " {'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", " {'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SRP68 and then measure expression of PPCS. Does this perturbation cause a significant change in PPCS expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SAMM50 and examine the expression of FCGR2A. Does perturbing SAMM50 lead to a significant change in FCGR2A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SYVN1 is perturbed and YTHDF2 expression is quantified. Does this perturbation result in a significant change in YTHDF2 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TIMM23 and then measure expression of SH3BGRL3. Does this perturbation cause a significant change in SH3BGRL3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLMO2 is perturbed and the expression of FCGR2A is measured. Does this perturbation cause a significant change in FCGR2A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PPWD1 is associated with a significant change in CD52 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSD17B12 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DNAJC19 is perturbed and PHGDH expression is quantified. Does this perturbation result in a significant change in PHGDH expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SPCS2 is perturbed and the expression of SH3BGRL3 is measured. Determine whether SH3BGRL3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of CD52 is measured. Determine whether CD52 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UFM1 is perturbed and the expression of CLCA1 is measured. Does this perturbation cause a significant change in CLCA1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEC63 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLC35B1 is perturbed and the expression of FCER1G is measured. Does this perturbation cause a significant change in FCER1G expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IDH3A is perturbed and SNHG12 expression is observed. Does this perturbation lead to a significant difference in SNHG12 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CHERP is perturbed and NPL expression is quantified. Does this perturbation result in a significant change in NPL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SLC35B1 and examine the expression of RHCE. Does perturbing SLC35B1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, BHLHE40 is perturbed and SESN2 expression is measured. Determine whether SESN2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TMED2 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SOCS1 is perturbed and RPS27 expression is measured. Determine whether RPS27 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GBF1 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TMEM167A is perturbed and the expression of CD52 is measured. Does this perturbation cause a significant change in CD52 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRGBP and then measure expression of RPS27. Does this perturbation cause a significant change in RPS27 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, HSD17B12 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:53:10.750\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.923 | Total tokens: 14248104 | Current cost: $0.001 | Current tokens: 5242\u001b[0m\n", "\u001b[32m2026-01-01 17:53:12.166\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.923 | Total tokens: 14248199 | Current cost: $0.000 | Current tokens: 95\u001b[0m\n", "\u001b[32m2026-01-01 17:53:13.535\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.923 | Total tokens: 14249673 | Current cost: $0.000 | Current tokens: 1474\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:53:16.542\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.924 | Total tokens: 14254982 | Current cost: $0.001 | Current tokens: 5309\u001b[0m\n", "\u001b[32m2026-01-01 17:53:17.588\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.924 | Total tokens: 14255086 | Current cost: $0.000 | Current tokens: 104\u001b[0m\n", "\u001b[32m2026-01-01 17:53:19.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.924 | Total tokens: 14257529 | Current cost: $0.000 | Current tokens: 2443\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:53:22.714\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.925 | Total tokens: 14262837 | Current cost: $0.001 | Current tokens: 5308\u001b[0m\n", "\u001b[32m2026-01-01 17:53:24.190\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.925 | Total tokens: 14262934 | Current cost: $0.000 | Current tokens: 97\u001b[0m\n", "\u001b[32m2026-01-01 17:53:26.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.926 | Total tokens: 14265612 | Current cost: $0.000 | Current tokens: 2678\u001b[0m\n", "\u001b[32m2026-01-01 17:53:26.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 17 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:45, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:41, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:38, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:37, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:47, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:44, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:41, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:39, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:08<00:39, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:09<00:35, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:09<00:32, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:10<00:32, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:11<00:29, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:12<00:28, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:13<00:31, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:14<00:29, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:15<00:29, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:15<00:26, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:16<00:26, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:17<00:25, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:18<00:25, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:19<00:26, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:21<00:29, 1.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:21<00:26, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:22<00:24, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:24<00:25, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:25<00:23, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:25<00:21, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:26<00:18, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:27<00:18, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:28<00:19, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:29<00:17, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:30<00:16, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:32<00:17, 1.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:32<00:15, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:34<00:15, 1.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:35<00:13, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:36<00:13, 1.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:37<00:11, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:38<00:10, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:39<00:09, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:40<00:07, 1.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:41<00:06, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:41<00:05, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:42<00:04, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:43<00:03, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:44<00:03, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:45<00:01, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:46<00:00, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:47<00:00, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:54:14.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 17 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.98}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:54:17.325\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.932 | Total tokens: 14304310 | Current cost: $0.001 | Current tokens: 8029\u001b[0m\n", "- The workflow lacks a validation step to ensure that the generated answer aligns with the specific format required ('Final Answer: Yes' or 'Final Answer: No'), which could lead to incorrect outputs.\n", "- There are multiple instances of incorrect computation results leading to discrepancies between predictions and solutions, indicating potential flaws in the answer generation or validation processes.\n", "- The workflow does not account for the possibility of ambiguous or conflicting data in the input questions, which could lead to misleading answers.\n", "- The execution history shows several cases where the final answer was marked as correct despite the underlying computations being incorrect, suggesting a lack of robust error handling or validation mechanisms.\n", "- The workflow assumes that all questions can be answered with a binary response without considering the context or complexity of the underlying data, which may not always be valid.\n", "\u001b[32m2026-01-01 17:54:18.603\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.932 | Total tokens: 14305012 | Current cost: $0.000 | Current tokens: 702\u001b[0m\n", "```python\n", "steps = [\n", " {'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", " {'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", " {'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SEC63 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GBF1 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, HSD17B12 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TMED2 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of TELO2, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SRPRB is perturbed and the expression of PPCS is measured. Determine whether PPCS shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of CD52 is measured. Determine whether CD52 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLC35B1 is perturbed and the expression of FCER1G is measured. Does this perturbation cause a significant change in FCER1G expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, STT3A is perturbed and the expression of FCER1G is measured. Determine whether FCER1G shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TIMM23 and then measure expression of SH3BGRL3. Does this perturbation cause a significant change in SH3BGRL3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, P4HB is perturbed and RHCE expression is measured. Determine whether RHCE exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SRP68 and then measure expression of PPCS. Does this perturbation cause a significant change in PPCS expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NEDD8 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, BHLHE40 is perturbed and SESN2 expression is measured. Determine whether SESN2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSD17B12 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLMO2 is perturbed and the expression of FCGR2A is measured. Does this perturbation cause a significant change in FCGR2A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PPWD1 is associated with a significant change in CD52 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TMEM167A is perturbed and the expression of CD52 is measured. Does this perturbation cause a significant change in CD52 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SOCS1 is perturbed and RPS27 expression is measured. Determine whether RPS27 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SCYL1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SAMM50 and examine the expression of FCGR2A. Does perturbing SAMM50 lead to a significant change in FCGR2A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ARHGAP22, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:54:21.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.933 | Total tokens: 14310187 | Current cost: $0.001 | Current tokens: 5175\u001b[0m\n", "\u001b[32m2026-01-01 17:54:22.588\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.933 | Total tokens: 14310290 | Current cost: $0.000 | Current tokens: 103\u001b[0m\n", "\u001b[32m2026-01-01 17:54:23.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.933 | Total tokens: 14311848 | Current cost: $0.000 | Current tokens: 1558\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:54:26.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.934 | Total tokens: 14317053 | Current cost: $0.001 | Current tokens: 5205\u001b[0m\n", "\u001b[32m2026-01-01 17:54:28.104\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.934 | Total tokens: 14317157 | Current cost: $0.000 | Current tokens: 104\u001b[0m\n", "\u001b[32m2026-01-01 17:54:30.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.934 | Total tokens: 14319695 | Current cost: $0.000 | Current tokens: 2538\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:54:33.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.935 | Total tokens: 14324901 | Current cost: $0.001 | Current tokens: 5206\u001b[0m\n", "\u001b[32m2026-01-01 17:54:34.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.935 | Total tokens: 14325004 | Current cost: $0.000 | Current tokens: 103\u001b[0m\n", "\u001b[32m2026-01-01 17:54:36.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.936 | Total tokens: 14327805 | Current cost: $0.000 | Current tokens: 2801\u001b[0m\n", "\u001b[32m2026-01-01 17:54:36.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 18 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:38, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:41, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:39, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:39, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:40, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:37, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:05<00:36, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:39, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:07<00:36, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:08<00:34, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:09<00:32, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:10<00:30, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:11<00:29, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:11<00:30, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:12<00:27, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:13<00:30, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:15<00:34, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:16<00:33, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:16<00:29, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:17<00:27, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:18<00:27, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:20<00:29, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:20<00:27, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:22<00:26, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:22<00:23, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:24<00:26, 1.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:25<00:24, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:26<00:22, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:26<00:20, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:27<00:18, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:28<00:16, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:29<00:15, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:30<00:14, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:31<00:13, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:31<00:12, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:33<00:12, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:34<00:12, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:35<00:11, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:35<00:10, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:36<00:09, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:37<00:07, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:38<00:06, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:39<00:05, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:40<00:05, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:41<00:04, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:42<00:03, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:42<00:02, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:43<00:01, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:45<00:01, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:45<00:00, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:55:22.874\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 18 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.92}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:55:26.747\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.942 | Total tokens: 14366511 | Current cost: $0.001 | Current tokens: 8019\u001b[0m\n", "- The workflow lacks a validation step after generating the answer, which could lead to incorrect outputs being used in subsequent steps.\n", "- The control flow does not account for potential errors or inconsistencies in the answer validation process, leading to unhandled cases where the answer may not be valid.\n", "- There is an assumption that all generated answers will be valid without any checks for logical consistency or correctness before moving to contextualization.\n", "- The execution history shows multiple instances where the predictions and solutions do not align, indicating a failure in the validation process that should have caught these discrepancies.\n", "- The final answer format is strictly defined, yet the workflow does not ensure that all intermediate outputs conform to this format, which could lead to ambiguity in the final presentation of answers.\n", "\u001b[32m2026-01-01 17:55:27.885\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.942 | Total tokens: 14367200 | Current cost: $0.000 | Current tokens: 689\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZNF326 is associated with a significant change in RGS16 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, HSD17B12 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DNAJC19 is perturbed and PHGDH expression is quantified. Does this perturbation result in a significant change in PHGDH expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CHERP is perturbed and NPL expression is quantified. Does this perturbation result in a significant change in NPL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SEC63 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, IER3IP1 is perturbed and CLCA1 expression is measured. Determine whether CLCA1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLC35B1 is perturbed and the expression of FCER1G is measured. Does this perturbation cause a significant change in FCER1G expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UFM1 is perturbed and the expression of CLCA1 is measured. Does this perturbation cause a significant change in CLCA1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, P4HB is perturbed and RHCE expression is measured. Determine whether RHCE exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb IER3IP1 and monitor FCER1G expression. Decide whether this perturbation leads to a significant alteration in FCER1G expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SPCS2 is perturbed and the expression of SH3BGRL3 is measured. Determine whether SH3BGRL3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SCYL1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRGBP and then measure expression of RPS27. Does this perturbation cause a significant change in RPS27 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PPWD1 is associated with a significant change in CD52 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPZ1, does the expression profile of SH3BGRL3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IDH3A is perturbed and SNHG12 expression is observed. Does this perturbation lead to a significant difference in SNHG12 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb FARSB and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PDIA6 is perturbed and YTHDF2 expression is observed. Does this perturbation lead to a significant difference in YTHDF2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MARS, does the expression profile of FAM129A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SAMM50 and examine the expression of FCGR2A. Does perturbing SAMM50 lead to a significant change in FCGR2A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TMEM167A is perturbed and the expression of CD52 is measured. Does this perturbation cause a significant change in CD52 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TMED2 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SOCS1 is perturbed and RPS27 expression is measured. Determine whether RPS27 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DDIT3 and examine the expression of RGS16. Does perturbing DDIT3 lead to a significant change in RGS16 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of CD52 is measured. Determine whether CD52 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, STT3A is perturbed and the expression of FCER1G is measured. Determine whether FCER1G shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:55:30.402\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.943 | Total tokens: 14372378 | Current cost: $0.001 | Current tokens: 5178\u001b[0m\n", "\u001b[32m2026-01-01 17:55:31.324\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.943 | Total tokens: 14372482 | Current cost: $0.000 | Current tokens: 104\u001b[0m\n", "\u001b[32m2026-01-01 17:55:32.735\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.943 | Total tokens: 14374074 | Current cost: $0.000 | Current tokens: 1592\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:55:35.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.944 | Total tokens: 14379306 | Current cost: $0.001 | Current tokens: 5232\u001b[0m\n", "\u001b[32m2026-01-01 17:55:36.919\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.944 | Total tokens: 14379410 | Current cost: $0.000 | Current tokens: 104\u001b[0m\n", "\u001b[32m2026-01-01 17:55:38.760\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.945 | Total tokens: 14382064 | Current cost: $0.000 | Current tokens: 2654\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:55:41.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.945 | Total tokens: 14387310 | Current cost: $0.001 | Current tokens: 5246\u001b[0m\n", "\u001b[32m2026-01-01 17:55:42.586\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.945 | Total tokens: 14387404 | Current cost: $0.000 | Current tokens: 94\u001b[0m\n", "\u001b[32m2026-01-01 17:55:44.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.946 | Total tokens: 14390336 | Current cost: $0.000 | Current tokens: 2932\u001b[0m\n", "\u001b[32m2026-01-01 17:55:44.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 19 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:01<01:03, 1.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:02<00:51, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:03<00:46, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:42, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:37, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:37, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:42, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:42, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:08<00:37, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:09<00:33, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:10<00:39, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:11<00:34, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:11<00:31, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:13<00:33, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:13<00:30, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:14<00:32, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:16<00:33, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:17<00:31, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:17<00:27, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:18<00:28, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:19<00:25, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:20<00:25, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:21<00:25, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:22<00:24, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:23<00:22, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:24<00:22, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:25<00:21, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:26<00:22, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:27<00:19, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:28<00:18, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:28<00:17, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:29<00:15, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:30<00:15, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:31<00:14, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:32<00:13, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:33<00:13, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:34<00:11, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:35<00:11, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:36<00:10, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:37<00:08, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:37<00:07, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:38<00:07, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:40<00:07, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:40<00:05, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:41<00:04, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:42<00:03, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:44<00:03, 1.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:46<00:02, 1.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:49<00:01, 1.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:50<00:00, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:56:35.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 19 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.92}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:56:38.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.952 | Total tokens: 14429035 | Current cost: $0.001 | Current tokens: 8012\u001b[0m\n", "- The workflow lacks a validation step after generating the answer, which could lead to incorrect outputs being processed further without verification.\n", "- There is no explicit handling of cases where the answer might be ambiguous or require additional context, leading to potential misinterpretation of the results.\n", "- The assumption that all questions can be answered with a simple 'Yes' or 'No' may not hold true for all scenarios, risking oversimplification of complex biological data.\n", "- The workflow does not include a mechanism for addressing or logging errors encountered during execution, which could help in identifying patterns of failure.\n", "- The control flow does not account for potential contradictions between the predictions and solutions, which could lead to confusion in the final output.\n", "\u001b[32m2026-01-01 17:56:41.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.952 | Total tokens: 14429717 | Current cost: $0.000 | Current tokens: 682\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PDIA6 is perturbed and YTHDF2 expression is observed. Does this perturbation lead to a significant difference in YTHDF2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb HYOU1 and examine the expression of SNHG12. Does perturbing HYOU1 lead to a significant change in SNHG12 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NEDD8 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SOCS1 is perturbed and RPS27 expression is measured. Determine whether RPS27 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSD17B12 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRGBP and then measure expression of RPS27. Does this perturbation cause a significant change in RPS27 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEC63 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SRPRB is perturbed and the expression of PPCS is measured. Determine whether PPCS shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ARHGAP22, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, STT3A is perturbed and the expression of FCER1G is measured. Determine whether FCER1G shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SEC63 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of CD52 is measured. Determine whether CD52 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SPCS2 is perturbed and the expression of SH3BGRL3 is measured. Determine whether SH3BGRL3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of RHCE is measured. Determine whether RHCE shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SRP68 and then measure expression of PPCS. Does this perturbation cause a significant change in PPCS expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PPWD1 is associated with a significant change in CD52 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, BHLHE40 is perturbed and SESN2 expression is measured. Determine whether SESN2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TMEM167A is perturbed and the expression of CD52 is measured. Does this perturbation cause a significant change in CD52 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SYVN1 is perturbed and YTHDF2 expression is quantified. Does this perturbation result in a significant change in YTHDF2 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, HSD17B12 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:56:45.344\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.953 | Total tokens: 14434904 | Current cost: $0.001 | Current tokens: 5187\u001b[0m\n", "\u001b[32m2026-01-01 17:56:46.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.953 | Total tokens: 14435007 | Current cost: $0.000 | Current tokens: 103\u001b[0m\n", "\u001b[32m2026-01-01 17:56:48.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.953 | Total tokens: 14436670 | Current cost: $0.000 | Current tokens: 1663\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:56:51.911\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.954 | Total tokens: 14441920 | Current cost: $0.001 | Current tokens: 5250\u001b[0m\n", "\u001b[32m2026-01-01 17:56:52.954\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.954 | Total tokens: 14442027 | Current cost: $0.000 | Current tokens: 107\u001b[0m\n", "\u001b[32m2026-01-01 17:56:55.796\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.955 | Total tokens: 14444821 | Current cost: $0.000 | Current tokens: 2794\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:56:58.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.956 | Total tokens: 14450057 | Current cost: $0.001 | Current tokens: 5236\u001b[0m\n", "\u001b[32m2026-01-01 17:56:59.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.956 | Total tokens: 14450153 | Current cost: $0.000 | Current tokens: 96\u001b[0m\n", "\u001b[32m2026-01-01 17:57:02.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.956 | Total tokens: 14453212 | Current cost: $0.001 | Current tokens: 3059\u001b[0m\n", "\u001b[32m2026-01-01 17:57:02.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 20 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:42, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:42, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:38, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:45, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:45, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:41, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:41, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:40, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:08<00:39, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:09<00:37, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:10<00:35, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:11<00:33, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:12<00:33, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:12<00:32, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:13<00:30, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:14<00:31, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:16<00:35, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:17<00:32, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:17<00:29, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:18<00:26, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:19<00:24, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:20<00:26, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:21<00:25, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:22<00:24, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:23<00:22, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:24<00:20, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:24<00:19, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:25<00:19, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:26<00:19, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:27<00:18, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:28<00:16, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:29<00:15, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:30<00:14, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:31<00:15, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:32<00:14, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:33<00:12, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:33<00:10, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:34<00:10, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:35<00:09, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:36<00:09, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:37<00:08, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:38<00:07, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:39<00:07, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:40<00:05, 1.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:41<00:04, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:42<00:03, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:43<00:02, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:44<00:01, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:44<00:00, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:45<00:00, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-01 17:57:47.794\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 20 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.92}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:57:51.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.962 | Total tokens: 14491942 | Current cost: $0.001 | Current tokens: 8043\u001b[0m\n", "- The workflow lacks a validation step for the initial question input, which could lead to incorrect assumptions being made about the context or requirements of the question.\n", "- There is no explicit handling of cases where the answer might be ambiguous or where the data may not support a clear 'Yes' or 'No' response, leading to potential misinterpretation of results.\n", "- The control flow does not account for scenarios where the validation of the answer fails, as there is no mechanism to revisit or adjust the answer based on validation feedback.\n", "- The workflow assumes that all questions are straightforward and can be answered with a binary response, which may not hold true for all perturbation experiments, potentially oversimplifying complex biological data.\n", "- The execution history shows multiple instances where the predicted answers were incorrect, indicating a potential flaw in the answer generation process that is not addressed in the workflow.\n", "\u001b[32m2026-01-01 17:57:52.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.963 | Total tokens: 14492655 | Current cost: $0.000 | Current tokens: 713\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TMED2 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLMO2 is perturbed and the expression of FCGR2A is measured. Does this perturbation cause a significant change in FCGR2A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CHERP is perturbed and NPL expression is quantified. Does this perturbation result in a significant change in NPL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSD17B12 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SRPRB is perturbed and the expression of PPCS is measured. Determine whether PPCS shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NEDD8 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SLC35B1 and examine the expression of RHCE. Does perturbing SLC35B1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPZ1, does the expression profile of SH3BGRL3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IDH3A is perturbed and SNHG12 expression is observed. Does this perturbation lead to a significant difference in SNHG12 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of RHCE is measured. Determine whether RHCE shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of TELO2, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SOCS1 is perturbed and RPS27 expression is measured. Determine whether RPS27 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SPCS2 is perturbed and the expression of SH3BGRL3 is measured. Determine whether SH3BGRL3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SCYL1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEC63 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SRP72 and examine the expression of RPS27. Does perturbing SRP72 lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, IER3IP1 is perturbed and CLCA1 expression is measured. Determine whether CLCA1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UFM1 is perturbed and the expression of CLCA1 is measured. Does this perturbation cause a significant change in CLCA1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DNAJC19 is perturbed and PHGDH expression is quantified. Does this perturbation result in a significant change in PHGDH expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZNF326 is associated with a significant change in RGS16 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DDIT3 and examine the expression of RGS16. Does perturbing DDIT3 lead to a significant change in RGS16 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb FARSB and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TIMM23 and then measure expression of SH3BGRL3. Does this perturbation cause a significant change in SH3BGRL3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:57:55.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.963 | Total tokens: 14497818 | Current cost: $0.001 | Current tokens: 5163\u001b[0m\n", "\u001b[32m2026-01-01 17:57:56.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.963 | Total tokens: 14497940 | Current cost: $0.000 | Current tokens: 122\u001b[0m\n", "\u001b[32m2026-01-01 17:57:58.574\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.964 | Total tokens: 14499657 | Current cost: $0.000 | Current tokens: 1717\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:58:00.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.965 | Total tokens: 14504896 | Current cost: $0.001 | Current tokens: 5239\u001b[0m\n", "\u001b[32m2026-01-01 17:58:02.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.965 | Total tokens: 14505001 | Current cost: $0.000 | Current tokens: 105\u001b[0m\n", "\u001b[32m2026-01-01 17:58:05.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.965 | Total tokens: 14507901 | Current cost: $0.000 | Current tokens: 2900\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:58:08.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.966 | Total tokens: 14513144 | Current cost: $0.001 | Current tokens: 5243\u001b[0m\n", "\u001b[32m2026-01-01 17:58:09.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.966 | Total tokens: 14513240 | Current cost: $0.000 | Current tokens: 96\u001b[0m\n", "\u001b[32m2026-01-01 17:58:11.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.967 | Total tokens: 14516441 | Current cost: $0.001 | Current tokens: 3201\u001b[0m\n", "\u001b[32m2026-01-01 17:58:11.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 21 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:41, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:38, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:45, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:45, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:41, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:44, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:41, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:39, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:08<00:37, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:09<00:35, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:10<00:33, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:10<00:31, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:11<00:30, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:12<00:29, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:13<00:27, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:13<00:26, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:14<00:26, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:15<00:26, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:16<00:25, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:17<00:24, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:18<00:24, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:19<00:28, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:20<00:26, 1.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:21<00:24, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:22<00:24, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:23<00:22, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:24<00:22, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:25<00:19, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:26<00:20, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:27<00:18, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:27<00:17, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:28<00:14, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:29<00:16, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:31<00:16, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:32<00:18, 1.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:33<00:15, 1.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:34<00:14, 1.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:35<00:11, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:36<00:10, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:36<00:09, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:37<00:07, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:38<00:06, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:40<00:07, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:40<00:05, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:41<00:04, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:42<00:03, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:43<00:02, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:44<00:01, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:44<00:00, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:45<00:00, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 17:58:57.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 21 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.92}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:58:59.757\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.973 | Total tokens: 14555129 | Current cost: $0.001 | Current tokens: 7990\u001b[0m\n", "- The workflow lacks a validation step after generating the answer, which could lead to incorrect outputs being accepted without verification.\n", "- The control flow does not account for potential errors in the answer generation or validation steps, leading to unhandled cases where the answer may be incorrect.\n", "- There is an inconsistency in the execution history where multiple instances show incorrect predictions and solutions, indicating a failure to adequately address or learn from errors in prior steps.\n", "- The prompts and intermediate steps do not specify how to handle cases where the expression change is ambiguous or not statistically significant, leading to potential misinterpretation of results.\n", "\u001b[32m2026-01-01 17:59:01.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.973 | Total tokens: 14555789 | Current cost: $0.000 | Current tokens: 660\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPZ1, does the expression profile of SH3BGRL3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SYVN1 is perturbed and YTHDF2 expression is quantified. Does this perturbation result in a significant change in YTHDF2 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLC35B1 is perturbed and the expression of FCER1G is measured. Does this perturbation cause a significant change in FCER1G expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SPCS2 is perturbed and the expression of SH3BGRL3 is measured. Determine whether SH3BGRL3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SRP72 and examine the expression of RPS27. Does perturbing SRP72 lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SAMM50 and examine the expression of FCGR2A. Does perturbing SAMM50 lead to a significant change in FCGR2A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SLC35B1 and examine the expression of RHCE. Does perturbing SLC35B1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TMED2 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb FARSB and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb HYOU1 and examine the expression of SNHG12. Does perturbing HYOU1 lead to a significant change in SNHG12 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of RHCE is measured. Determine whether RHCE shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSD17B12 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZNF326 is associated with a significant change in RGS16 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GBF1 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ARHGAP22, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, BHLHE40 is perturbed and SESN2 expression is measured. Determine whether SESN2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PDIA6 is perturbed and YTHDF2 expression is observed. Does this perturbation lead to a significant difference in YTHDF2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PPWD1 is associated with a significant change in CD52 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NEDD8 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SOCS1 is perturbed and RPS27 expression is measured. Determine whether RPS27 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb IER3IP1 and monitor FCER1G expression. Decide whether this perturbation leads to a significant alteration in FCER1G expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLMO2 is perturbed and the expression of FCGR2A is measured. Does this perturbation cause a significant change in FCGR2A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CHERP is perturbed and NPL expression is quantified. Does this perturbation result in a significant change in NPL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEC63 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, P4HB is perturbed and RHCE expression is measured. Determine whether RHCE exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, STT3A is perturbed and the expression of FCER1G is measured. Determine whether FCER1G shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:59:04.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.974 | Total tokens: 14560988 | Current cost: $0.001 | Current tokens: 5199\u001b[0m\n", "\u001b[32m2026-01-01 17:59:05.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.974 | Total tokens: 14561083 | Current cost: $0.000 | Current tokens: 95\u001b[0m\n", "\u001b[32m2026-01-01 17:59:06.266\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.974 | Total tokens: 14562858 | Current cost: $0.000 | Current tokens: 1775\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:59:08.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.975 | Total tokens: 14568103 | Current cost: $0.001 | Current tokens: 5245\u001b[0m\n", "\u001b[32m2026-01-01 17:59:09.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.975 | Total tokens: 14568208 | Current cost: $0.000 | Current tokens: 105\u001b[0m\n", "\u001b[32m2026-01-01 17:59:11.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.975 | Total tokens: 14571222 | Current cost: $0.001 | Current tokens: 3014\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 17:59:14.404\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.976 | Total tokens: 14576455 | Current cost: $0.001 | Current tokens: 5233\u001b[0m\n", "\u001b[32m2026-01-01 17:59:15.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.976 | Total tokens: 14576550 | Current cost: $0.000 | Current tokens: 95\u001b[0m\n", "\u001b[32m2026-01-01 17:59:17.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.977 | Total tokens: 14579864 | Current cost: $0.001 | Current tokens: 3314\u001b[0m\n", "\u001b[32m2026-01-01 17:59:17.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 22 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:01<01:22, 1.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:02<01:05, 1.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:03<00:59, 1.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:04<00:48, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:05<00:42, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:06<00:45, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:07<00:41, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:08<00:37, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:09<00:36, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:09<00:32, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:10<00:35, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:11<00:34, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:12<00:34, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:14<00:39, 1.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:15<00:36, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:15<00:33, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:16<00:30, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:17<00:27, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:18<00:29, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:19<00:27, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:20<00:25, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:21<00:25, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:22<00:24, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:22<00:22, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:23<00:20, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:24<00:18, 1.31it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:24<00:17, 1.35it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:25<00:15, 1.38it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:26<00:17, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:27<00:17, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:28<00:16, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:29<00:15, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:30<00:14, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:31<00:17, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:32<00:14, 1.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:33<00:13, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:34<00:11, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:35<00:10, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:36<00:10, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:37<00:09, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:38<00:09, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:39<00:08, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:40<00:06, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:40<00:05, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:42<00:04, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:42<00:03, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:43<00:02, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:44<00:01, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:45<00:00, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:46<00:00, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 18:00:04.675\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 22 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.96}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 18:00:07.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.983 | Total tokens: 14618573 | Current cost: $0.001 | Current tokens: 8006\u001b[0m\n", "- The workflow lacks a step to handle potential errors or inconsistencies in the predictions and solutions, leading to unhandled cases where the computation result is incorrect.\n", "- There is no validation step to ensure that the generated answer aligns with the strict format required ('Final Answer: Yes' or 'Final Answer: No'), which could lead to ambiguous outputs.\n", "- The control flow does not account for scenarios where the validation of the answer fails, resulting in premature termination of the workflow without addressing the issue.\n", "- The assumption that all questions can be answered with a simple 'Yes' or 'No' may not hold true for all cases, leading to potential oversights in more complex queries.\n", "\u001b[32m2026-01-01 18:00:09.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.983 | Total tokens: 14619272 | Current cost: $0.000 | Current tokens: 699\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']},\n", "{'name': 'error_handling', 'args': ['validated_answer'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb IER3IP1 and monitor FCER1G expression. Decide whether this perturbation leads to a significant alteration in FCER1G expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SLC35B1 and examine the expression of RHCE. Does perturbing SLC35B1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb HYOU1 and examine the expression of SNHG12. Does perturbing HYOU1 lead to a significant change in SNHG12 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UFM1 is perturbed and the expression of CLCA1 is measured. Does this perturbation cause a significant change in CLCA1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TMEM167A is perturbed and the expression of CD52 is measured. Does this perturbation cause a significant change in CD52 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPZ1, does the expression profile of SH3BGRL3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZNF326 is associated with a significant change in RGS16 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb FARSB and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TMED2 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, HSD17B12 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SRP72 and examine the expression of RPS27. Does perturbing SRP72 lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SRP68 and then measure expression of PPCS. Does this perturbation cause a significant change in PPCS expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DNAJC19 is perturbed and PHGDH expression is quantified. Does this perturbation result in a significant change in PHGDH expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of RHCE is measured. Determine whether RHCE shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GBF1 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, P4HB is perturbed and RHCE expression is measured. Determine whether RHCE exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLMO2 is perturbed and the expression of FCGR2A is measured. Does this perturbation cause a significant change in FCGR2A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PDIA6 is perturbed and YTHDF2 expression is observed. Does this perturbation lead to a significant difference in YTHDF2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSD17B12 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLC35B1 is perturbed and the expression of FCER1G is measured. Does this perturbation cause a significant change in FCER1G expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 18:00:11.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.984 | Total tokens: 14624485 | Current cost: $0.001 | Current tokens: 5213\u001b[0m\n", "\u001b[32m2026-01-01 18:00:13.184\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.984 | Total tokens: 14624588 | Current cost: $0.000 | Current tokens: 103\u001b[0m\n", "\u001b[32m2026-01-01 18:00:14.492\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.984 | Total tokens: 14626446 | Current cost: $0.000 | Current tokens: 1858\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 18:00:17.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.985 | Total tokens: 14631725 | Current cost: $0.001 | Current tokens: 5279\u001b[0m\n", "\u001b[32m2026-01-01 18:00:18.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.985 | Total tokens: 14631844 | Current cost: $0.000 | Current tokens: 119\u001b[0m\n", "\u001b[32m2026-01-01 18:00:22.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.986 | Total tokens: 14635028 | Current cost: $0.001 | Current tokens: 3184\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 18:00:25.615\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.987 | Total tokens: 14640301 | Current cost: $0.001 | Current tokens: 5273\u001b[0m\n", "\u001b[32m2026-01-01 18:00:27.700\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.987 | Total tokens: 14640396 | Current cost: $0.000 | Current tokens: 95\u001b[0m\n", "\u001b[32m2026-01-01 18:00:30.603\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.987 | Total tokens: 14643876 | Current cost: $0.001 | Current tokens: 3480\u001b[0m\n", "{'name': 'error_handling3683', 'description': 'Task to error_handling3683. Takes validated_answer as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for error_handling3683', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from error_handling3683', 'required': True}], 'prompt': 'Your are a task solver.', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 18:00:32.921\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.988 | Total tokens: 14649130 | Current cost: $0.001 | Current tokens: 5254\u001b[0m\n", "\u001b[32m2026-01-01 18:00:34.508\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.988 | Total tokens: 14649259 | Current cost: $0.000 | Current tokens: 129\u001b[0m\n", "\u001b[32m2026-01-01 18:00:36.711\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.988 | Total tokens: 14649908 | Current cost: $0.000 | Current tokens: 649\u001b[0m\n", "\u001b[32m2026-01-01 18:00:36.712\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 23 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:01<01:14, 1.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:02<00:51, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:03<00:53, 1.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:04<00:55, 1.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:05<00:49, 1.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:07<00:53, 1.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:08<00:47, 1.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:09<00:48, 1.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:10<00:46, 1.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:11<00:42, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:12<00:41, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:13<00:38, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:14<00:36, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:15<00:38, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:16<00:35, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:17<00:33, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:18<00:30, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:18<00:28, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:19<00:29, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:20<00:29, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:21<00:26, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:22<00:27, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:23<00:26, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:24<00:25, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:25<00:23, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:26<00:23, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:27<00:22, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:28<00:20, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:29<00:18, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:30<00:18, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:31<00:17, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:32<00:16, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:33<00:16, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:34<00:14, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:35<00:14, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:35<00:13, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:37<00:13, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:37<00:11, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:39<00:11, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:40<00:10, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:41<00:09, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:41<00:07, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:42<00:06, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:43<00:05, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:45<00:05, 1.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:46<00:04, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:47<00:03, 1.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:48<00:02, 1.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:50<00:01, 1.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:51<00:00, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 18:01:28.180\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 23 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.92}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 18:01:31.024\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.995 | Total tokens: 14688609 | Current cost: $0.001 | Current tokens: 8015\u001b[0m\n", "- The workflow lacks a validation step after generating the answer, which could lead to incorrect outputs being used in subsequent steps without verification.\n", "- There are multiple instances of incorrect predictions leading to incorrect solutions, indicating a failure in the computational logic or data processing that was not addressed in the workflow.\n", "- The workflow does not account for potential contradictions in the answers derived from different questions, which could lead to inconsistencies in the final outputs.\n", "- The prompts and intermediate steps do not specify the criteria for determining \"significant change,\" which may lead to ambiguity in the interpretation of results.\n", "- The control flow does not include error handling for cases where the validation fails, which could result in unhandled exceptions or incorrect final answers being presented.\n", "\u001b[32m2026-01-01 18:01:32.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.995 | Total tokens: 14689294 | Current cost: $0.000 | Current tokens: 685\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLC35B1 is perturbed and the expression of FCER1G is measured. Does this perturbation cause a significant change in FCER1G expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GBF1 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of CD52 is measured. Determine whether CD52 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SLC35B1 and examine the expression of RHCE. Does perturbing SLC35B1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, HSD17B12 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb HYOU1 and examine the expression of SNHG12. Does perturbing HYOU1 lead to a significant change in SNHG12 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of RHCE is measured. Determine whether RHCE shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZNF326 is associated with a significant change in RGS16 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, P4HB is perturbed and RHCE expression is measured. Determine whether RHCE exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEC63 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MARS, does the expression profile of FAM129A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SRP72 and examine the expression of RPS27. Does perturbing SRP72 lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SRPRB is perturbed and the expression of PPCS is measured. Determine whether PPCS shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SAMM50 and examine the expression of FCGR2A. Does perturbing SAMM50 lead to a significant change in FCGR2A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb FARSB and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SYVN1 is perturbed and YTHDF2 expression is quantified. Does this perturbation result in a significant change in YTHDF2 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IDH3A is perturbed and SNHG12 expression is observed. Does this perturbation lead to a significant difference in SNHG12 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UFM1 is perturbed and the expression of CLCA1 is measured. Does this perturbation cause a significant change in CLCA1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PPWD1 is associated with a significant change in CD52 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SEC63 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRGBP and then measure expression of RPS27. Does this perturbation cause a significant change in RPS27 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, IER3IP1 is perturbed and CLCA1 expression is measured. Determine whether CLCA1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, STT3A is perturbed and the expression of FCER1G is measured. Determine whether FCER1G shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPZ1, does the expression profile of SH3BGRL3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NEDD8 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSD17B12 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 18:01:35.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.995 | Total tokens: 14694478 | Current cost: $0.001 | Current tokens: 5184\u001b[0m\n", "\u001b[32m2026-01-01 18:01:37.185\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.996 | Total tokens: 14694600 | Current cost: $0.000 | Current tokens: 122\u001b[0m\n", "\u001b[32m2026-01-01 18:01:38.800\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.996 | Total tokens: 14696520 | Current cost: $0.000 | Current tokens: 1920\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 18:01:41.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.997 | Total tokens: 14701747 | Current cost: $0.001 | Current tokens: 5227\u001b[0m\n", "\u001b[32m2026-01-01 18:01:43.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.997 | Total tokens: 14701851 | Current cost: $0.000 | Current tokens: 104\u001b[0m\n", "\u001b[32m2026-01-01 18:01:45.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.997 | Total tokens: 14705096 | Current cost: $0.001 | Current tokens: 3245\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 18:01:48.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.998 | Total tokens: 14710317 | Current cost: $0.001 | Current tokens: 5221\u001b[0m\n", "\u001b[32m2026-01-01 18:01:49.291\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.998 | Total tokens: 14710417 | Current cost: $0.000 | Current tokens: 100\u001b[0m\n", "\u001b[32m2026-01-01 18:01:51.961\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $2.999 | Total tokens: 14713990 | Current cost: $0.001 | Current tokens: 3573\u001b[0m\n", "\u001b[32m2026-01-01 18:01:51.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 24 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:01<01:10, 1.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:02<00:47, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:40, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:04<01:03, 1.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:07<01:12, 1.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:08<01:09, 1.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:11<01:20, 1.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:11<01:05, 1.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:12<00:57, 1.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:13<00:47, 1.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:14<00:43, 1.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:15<00:39, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:16<00:38, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:17<00:33, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:18<00:33, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:19<00:32, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:20<00:32, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:21<00:30, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:21<00:28, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:22<00:27, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:23<00:26, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:24<00:27, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:25<00:26, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:27<00:33, 1.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:28<00:30, 1.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:30<00:30, 1.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:32<00:32, 1.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:33<00:27, 1.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:35<00:31, 1.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:36<00:26, 1.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:37<00:23, 1.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:37<00:19, 1.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:39<00:19, 1.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:40<00:18, 1.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:41<00:16, 1.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:42<00:15, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:43<00:15, 1.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:44<00:13, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:45<00:11, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:46<00:09, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:47<00:09, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:49<00:09, 1.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:50<00:08, 1.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:51<00:06, 1.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:52<00:05, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:53<00:03, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:53<00:02, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:55<00:02, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:55<00:00, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:56<00:00, 1.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 18:02:48.863\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 24 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.96}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 18:02:51.807\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.005 | Total tokens: 14752644 | Current cost: $0.001 | Current tokens: 8009\u001b[0m\n", "- The workflow lacks a validation step before generating the answer, which could lead to incorrect outputs being processed further.\n", "- The validation step is not clearly defined, which raises concerns about its effectiveness and reliability in ensuring the correctness of the answer.\n", "- There is no mechanism to handle cases where the answer cannot be determined or is ambiguous, leading to potential premature termination of the workflow.\n", "- The workflow assumes that all questions can be answered with a binary response ('Yes' or 'No'), which may not be applicable for all scenarios, potentially leading to misleading conclusions.\n", "- The execution history shows multiple instances where the predicted answers were incorrect, indicating a failure in the underlying model or data used for generating predictions.\n", "\u001b[32m2026-01-01 18:02:53.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.005 | Total tokens: 14753323 | Current cost: $0.000 | Current tokens: 679\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ARHGAP22, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SRPRB is perturbed and the expression of PPCS is measured. Determine whether PPCS shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, P4HB is perturbed and RHCE expression is measured. Determine whether RHCE exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SYVN1 is perturbed and YTHDF2 expression is quantified. Does this perturbation result in a significant change in YTHDF2 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TIMM23 and then measure expression of SH3BGRL3. Does this perturbation cause a significant change in SH3BGRL3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IDH3A is perturbed and SNHG12 expression is observed. Does this perturbation lead to a significant difference in SNHG12 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TMEM167A is perturbed and the expression of CD52 is measured. Does this perturbation cause a significant change in CD52 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SCYL1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SPCS2 is perturbed and the expression of SH3BGRL3 is measured. Determine whether SH3BGRL3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CHERP is perturbed and NPL expression is quantified. Does this perturbation result in a significant change in NPL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MARS, does the expression profile of FAM129A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PPWD1 is associated with a significant change in CD52 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of CD52 is measured. Determine whether CD52 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, BHLHE40 is perturbed and SESN2 expression is measured. Determine whether SESN2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of TELO2, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSD17B12 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLC35B1 is perturbed and the expression of FCER1G is measured. Does this perturbation cause a significant change in FCER1G expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UFM1 is perturbed and the expression of CLCA1 is measured. Does this perturbation cause a significant change in CLCA1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, IER3IP1 is perturbed and CLCA1 expression is measured. Determine whether CLCA1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PDIA6 is perturbed and YTHDF2 expression is observed. Does this perturbation lead to a significant difference in YTHDF2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DDIT3 and examine the expression of RGS16. Does perturbing DDIT3 lead to a significant change in RGS16 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEC63 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb FARSB and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPZ1, does the expression profile of SH3BGRL3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZNF326 is associated with a significant change in RGS16 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 18:02:57.165\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.006 | Total tokens: 14758464 | Current cost: $0.001 | Current tokens: 5141\u001b[0m\n", "\u001b[32m2026-01-01 18:02:58.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.006 | Total tokens: 14758569 | Current cost: $0.000 | Current tokens: 105\u001b[0m\n", "\u001b[32m2026-01-01 18:02:59.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.006 | Total tokens: 14760538 | Current cost: $0.000 | Current tokens: 1969\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 18:03:03.185\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.007 | Total tokens: 14765697 | Current cost: $0.001 | Current tokens: 5159\u001b[0m\n", "\u001b[32m2026-01-01 18:03:04.215\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.007 | Total tokens: 14765800 | Current cost: $0.000 | Current tokens: 103\u001b[0m\n", "\u001b[32m2026-01-01 18:03:06.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.008 | Total tokens: 14769144 | Current cost: $0.001 | Current tokens: 3344\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 18:03:09.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.008 | Total tokens: 14774320 | Current cost: $0.001 | Current tokens: 5176\u001b[0m\n", "\u001b[32m2026-01-01 18:03:10.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.009 | Total tokens: 14774417 | Current cost: $0.000 | Current tokens: 97\u001b[0m\n", "\u001b[32m2026-01-01 18:03:13.031\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.009 | Total tokens: 14778122 | Current cost: $0.001 | Current tokens: 3705\u001b[0m\n", "\u001b[32m2026-01-01 18:03:13.031\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 25 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:01<01:30, 1.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:02<01:02, 1.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:05<01:28, 1.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:07<01:24, 1.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:07<01:03, 1.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:08<00:55, 1.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:09<00:47, 1.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:10<00:43, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:11<00:41, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:12<00:38, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:13<00:38, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:14<00:35, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:15<00:35, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:16<00:35, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:17<00:34, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:17<00:30, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:19<00:33, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:19<00:31, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:20<00:27, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:22<00:35, 1.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:24<00:37, 1.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:25<00:33, 1.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:26<00:31, 1.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:27<00:29, 1.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:28<00:26, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:29<00:24, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:29<00:22, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:30<00:20, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:31<00:18, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:32<00:20, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:33<00:19, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:35<00:19, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:36<00:22, 1.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:37<00:18, 1.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:38<00:16, 1.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:40<00:16, 1.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:40<00:14, 1.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:41<00:12, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:42<00:10, 1.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:43<00:09, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:44<00:07, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:45<00:07, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:46<00:06, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:47<00:05, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:47<00:04, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:48<00:03, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:50<00:02, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:50<00:01, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:52<00:01, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:53<00:00, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 18:04:06.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 25 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.92}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 18:04:09.589\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.015 | Total tokens: 14816803 | Current cost: $0.001 | Current tokens: 8017\u001b[0m\n", "- The workflow lacks a validation step to ensure that the generated answer aligns with the specific format required ('Final Answer: Yes' or 'Final Answer: No'), which could lead to incorrect outputs.\n", "- There are multiple instances of incorrect computation results leading to mismatches between predictions and solutions, indicating potential flaws in the answer generation or validation processes.\n", "- The workflow does not account for the possibility of ambiguous or contradictory questions, which could result in misleading answers.\n", "- The control flow does not include error handling for cases where the validation fails, leading to unaddressed discrepancies in the output.\n", "- The assumption that all questions can be answered with a binary response may not hold true for all experimental contexts, potentially oversimplifying complex biological scenarios.\n", "\u001b[32m2026-01-01 18:04:13.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.015 | Total tokens: 14817490 | Current cost: $0.000 | Current tokens: 687\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRGBP and then measure expression of RPS27. Does this perturbation cause a significant change in RPS27 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of RHCE is measured. Determine whether RHCE shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEC63 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SRP72 and examine the expression of RPS27. Does perturbing SRP72 lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb IER3IP1 and monitor FCER1G expression. Decide whether this perturbation leads to a significant alteration in FCER1G expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TMEM167A is perturbed and the expression of CD52 is measured. Does this perturbation cause a significant change in CD52 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PDIA6 is perturbed and YTHDF2 expression is observed. Does this perturbation lead to a significant difference in YTHDF2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SYVN1 is perturbed and YTHDF2 expression is quantified. Does this perturbation result in a significant change in YTHDF2 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SRPRB is perturbed and the expression of PPCS is measured. Determine whether PPCS shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GBF1 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLMO2 is perturbed and the expression of FCGR2A is measured. Does this perturbation cause a significant change in FCGR2A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPZ1, does the expression profile of SH3BGRL3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb FARSB and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IDH3A is perturbed and SNHG12 expression is observed. Does this perturbation lead to a significant difference in SNHG12 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MARS, does the expression profile of FAM129A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CHERP is perturbed and NPL expression is quantified. Does this perturbation result in a significant change in NPL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZNF326 is associated with a significant change in RGS16 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SLC35B1 and examine the expression of RHCE. Does perturbing SLC35B1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, STT3A is perturbed and the expression of FCER1G is measured. Determine whether FCER1G shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SOCS1 is perturbed and RPS27 expression is measured. Determine whether RPS27 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SRP68 and then measure expression of PPCS. Does this perturbation cause a significant change in PPCS expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NEDD8 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 18:04:16.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.016 | Total tokens: 14822663 | Current cost: $0.001 | Current tokens: 5173\u001b[0m\n", "\u001b[32m2026-01-01 18:04:18.747\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.016 | Total tokens: 14822764 | Current cost: $0.000 | Current tokens: 101\u001b[0m\n", "\u001b[32m2026-01-01 18:04:20.020\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.017 | Total tokens: 14824794 | Current cost: $0.000 | Current tokens: 2030\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 18:04:22.863\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.018 | Total tokens: 14830014 | Current cost: $0.001 | Current tokens: 5220\u001b[0m\n", "\u001b[32m2026-01-01 18:04:24.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.018 | Total tokens: 14830118 | Current cost: $0.000 | Current tokens: 104\u001b[0m\n", "\u001b[32m2026-01-01 18:04:27.186\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.018 | Total tokens: 14833619 | Current cost: $0.001 | Current tokens: 3501\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 18:04:30.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.019 | Total tokens: 14838850 | Current cost: $0.001 | Current tokens: 5231\u001b[0m\n", "\u001b[32m2026-01-01 18:04:31.707\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.019 | Total tokens: 14838958 | Current cost: $0.000 | Current tokens: 108\u001b[0m\n", "\u001b[32m2026-01-01 18:04:36.876\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.020 | Total tokens: 14842833 | Current cost: $0.001 | Current tokens: 3875\u001b[0m\n", "\u001b[32m2026-01-01 18:04:36.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 26 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:40, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:36, 1.32it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:38, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:35, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:37, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:04<00:33, 1.30it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:05<00:36, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:06<00:33, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:07<00:37, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:08<00:35, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:09<00:33, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:10<00:32, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:11<00:34, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:12<00:33, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:12<00:31, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:14<00:40, 1.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:16<00:47, 1.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:17<00:42, 1.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:19<00:44, 1.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:20<00:40, 1.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:21<00:35, 1.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:22<00:31, 1.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:25<00:42, 1.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:26<00:38, 1.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:27<00:32, 1.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:28<00:28, 1.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:30<00:32, 1.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:32<00:34, 1.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:33<00:29, 1.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:34<00:25, 1.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:34<00:20, 1.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:36<00:21, 1.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:37<00:20, 1.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:38<00:19, 1.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:39<00:18, 1.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:40<00:15, 1.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:41<00:13, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:42<00:11, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:43<00:10, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:44<00:09, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:45<00:08, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:46<00:08, 1.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:48<00:08, 1.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:49<00:06, 1.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:50<00:05, 1.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:52<00:05, 1.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:53<00:04, 1.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:54<00:02, 1.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:55<00:01, 1.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:56<00:00, 1.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 18:05:33.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 26 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.94}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 18:05:36.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.026 | Total tokens: 14881558 | Current cost: $0.001 | Current tokens: 8033\u001b[0m\n", "- The workflow lacks a validation step after generating the answer, which could lead to incorrect outputs being used in subsequent steps without verification.\n", "- There is no explicit handling of cases where the generated answer may not conform to the required format ('Final Answer: Yes' or 'Final Answer: No'), which could result in ambiguous or incorrect responses.\n", "- The control flow does not account for potential errors in the validation step, which could lead to the propagation of incorrect answers without any corrective action.\n", "- The execution history shows multiple instances where the predicted answers were correct, but the solutions were marked incorrect, indicating a potential flaw in the validation logic or criteria used for correctness assessment.\n", "- The workflow does not specify how to handle conflicting results between predictions and solutions, which could lead to confusion or misinterpretation of the final answer.\n", "\u001b[32m2026-01-01 18:05:38.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.026 | Total tokens: 14882261 | Current cost: $0.000 | Current tokens: 703\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSD17B12 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SAMM50 and examine the expression of FCGR2A. Does perturbing SAMM50 lead to a significant change in FCGR2A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, IER3IP1 is perturbed and CLCA1 expression is measured. Determine whether CLCA1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IDH3A is perturbed and SNHG12 expression is observed. Does this perturbation lead to a significant difference in SNHG12 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRGBP and then measure expression of RPS27. Does this perturbation cause a significant change in RPS27 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SPCS2 is perturbed and the expression of SH3BGRL3 is measured. Determine whether SH3BGRL3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb IER3IP1 and monitor FCER1G expression. Decide whether this perturbation leads to a significant alteration in FCER1G expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of CD52 is measured. Determine whether CD52 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb FARSB and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLC35B1 is perturbed and the expression of FCER1G is measured. Does this perturbation cause a significant change in FCER1G expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TIMM23 and then measure expression of SH3BGRL3. Does this perturbation cause a significant change in SH3BGRL3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UFM1 is perturbed and the expression of CLCA1 is measured. Does this perturbation cause a significant change in CLCA1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SRP72 and examine the expression of RPS27. Does perturbing SRP72 lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of RHCE is measured. Determine whether RHCE shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CHERP is perturbed and NPL expression is quantified. Does this perturbation result in a significant change in NPL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NEDD8 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPZ1, does the expression profile of SH3BGRL3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, BHLHE40 is perturbed and SESN2 expression is measured. Determine whether SESN2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SEC63 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of TELO2, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DDIT3 and examine the expression of RGS16. Does perturbing DDIT3 lead to a significant change in RGS16 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PPWD1 is associated with a significant change in CD52 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SYVN1 is perturbed and YTHDF2 expression is quantified. Does this perturbation result in a significant change in YTHDF2 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TMEM167A is perturbed and the expression of CD52 is measured. Does this perturbation cause a significant change in CD52 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 18:05:41.594\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.027 | Total tokens: 14887453 | Current cost: $0.001 | Current tokens: 5192\u001b[0m\n", "\u001b[32m2026-01-01 18:05:42.769\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.027 | Total tokens: 14887572 | Current cost: $0.000 | Current tokens: 119\u001b[0m\n", "\u001b[32m2026-01-01 18:05:44.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.027 | Total tokens: 14889675 | Current cost: $0.000 | Current tokens: 2103\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 18:05:47.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.028 | Total tokens: 14894899 | Current cost: $0.001 | Current tokens: 5224\u001b[0m\n", "\u001b[32m2026-01-01 18:05:48.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.028 | Total tokens: 14894995 | Current cost: $0.000 | Current tokens: 96\u001b[0m\n", "\u001b[32m2026-01-01 18:05:51.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.029 | Total tokens: 14898587 | Current cost: $0.001 | Current tokens: 3592\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 18:05:55.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.030 | Total tokens: 14903815 | Current cost: $0.001 | Current tokens: 5228\u001b[0m\n", "\u001b[32m2026-01-01 18:05:56.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.030 | Total tokens: 14903923 | Current cost: $0.000 | Current tokens: 108\u001b[0m\n", "\u001b[32m2026-01-01 18:05:59.778\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.030 | Total tokens: 14907900 | Current cost: $0.001 | Current tokens: 3977\u001b[0m\n", "\u001b[32m2026-01-01 18:05:59.778\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 27 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:38, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:40, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:38, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:37, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:36, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:04<00:35, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:05<00:34, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:06<00:36, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:07<00:40, 1.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:08<00:36, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:10<00:43, 1.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:11<00:41, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:12<00:38, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:13<00:35, 1.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:14<00:42, 1.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:16<00:44, 1.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:17<00:41, 1.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:18<00:38, 1.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:19<00:33, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:20<00:32, 1.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:21<00:32, 1.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:22<00:29, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:23<00:28, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:24<00:25, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:26<00:30, 1.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:27<00:27, 1.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:27<00:23, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:28<00:22, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:29<00:21, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:30<00:19, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:31<00:17, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:32<00:16, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:34<00:19, 1.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:34<00:16, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:35<00:15, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:36<00:12, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:37<00:11, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:38<00:10, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:39<00:10, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:40<00:09, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:42<00:12, 1.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:44<00:11, 1.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:44<00:08, 1.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:46<00:07, 1.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:47<00:05, 1.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:48<00:04, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:48<00:03, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:49<00:01, 1.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:51<00:01, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:52<00:00, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 18:06:52.069\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 27 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.94}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 18:06:55.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.036 | Total tokens: 14946586 | Current cost: $0.001 | Current tokens: 8001\u001b[0m\n", "- The workflow lacks a validation step to ensure that the generated answer aligns with the expected format ('Final Answer: Yes' or 'Final Answer: No') before proceeding to contextualization, which could lead to incorrect outputs.\n", "- There are multiple instances of incorrect computation results leading to discrepancies between predictions and solutions, indicating potential flaws in the answer generation or validation logic.\n", "- The workflow does not account for the possibility of ambiguous or contradictory questions, which may lead to misinterpretation of the required answer format.\n", "- The execution history shows several cases where the final answer was marked as correct despite the underlying computations being incorrect, suggesting inadequate error handling or validation mechanisms.\n", "\u001b[32m2026-01-01 18:06:57.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.037 | Total tokens: 14947257 | Current cost: $0.000 | Current tokens: 671\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SEC63 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DDIT3 and examine the expression of RGS16. Does perturbing DDIT3 lead to a significant change in RGS16 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSD17B12 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MARS, does the expression profile of FAM129A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CHERP is perturbed and NPL expression is quantified. Does this perturbation result in a significant change in NPL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SLC35B1 and examine the expression of RHCE. Does perturbing SLC35B1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb HYOU1 and examine the expression of SNHG12. Does perturbing HYOU1 lead to a significant change in SNHG12 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NEDD8 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, STT3A is perturbed and the expression of FCER1G is measured. Determine whether FCER1G shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of RHCE is measured. Determine whether RHCE shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAMM50 and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PDIA6 is perturbed and YTHDF2 expression is observed. Does this perturbation lead to a significant difference in YTHDF2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SRP68 and then measure expression of PPCS. Does this perturbation cause a significant change in PPCS expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of TELO2, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRGBP and then measure expression of RPS27. Does this perturbation cause a significant change in RPS27 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb IER3IP1 and monitor FCER1G expression. Decide whether this perturbation leads to a significant alteration in FCER1G expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SPCS2 is perturbed and the expression of SH3BGRL3 is measured. Determine whether SH3BGRL3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SRPRB is perturbed and the expression of PPCS is measured. Determine whether PPCS shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZNF326 is associated with a significant change in RGS16 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLMO2 is perturbed and the expression of FCGR2A is measured. Does this perturbation cause a significant change in FCGR2A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, BHLHE40 is perturbed and SESN2 expression is measured. Determine whether SESN2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEC63 is perturbed and CD52 expression is measured. Determine whether CD52 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SAMM50, does the expression profile of FCGR2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPZ1, does the expression profile of SH3BGRL3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GBF1 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 18:06:59.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.037 | Total tokens: 14952423 | Current cost: $0.001 | Current tokens: 5166\u001b[0m\n", "\u001b[32m2026-01-01 18:07:00.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.037 | Total tokens: 14952527 | Current cost: $0.000 | Current tokens: 104\u001b[0m\n", "\u001b[32m2026-01-01 18:07:02.541\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.038 | Total tokens: 14954656 | Current cost: $0.000 | Current tokens: 2129\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 18:07:05.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.039 | Total tokens: 14959896 | Current cost: $0.001 | Current tokens: 5240\u001b[0m\n", "\u001b[32m2026-01-01 18:07:07.292\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.039 | Total tokens: 14959991 | Current cost: $0.000 | Current tokens: 95\u001b[0m\n", "\u001b[32m2026-01-01 18:07:09.260\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.039 | Total tokens: 14963726 | Current cost: $0.001 | Current tokens: 3735\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 18:07:12.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.040 | Total tokens: 14968959 | Current cost: $0.001 | Current tokens: 5233\u001b[0m\n", "\u001b[32m2026-01-01 18:07:13.620\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.040 | Total tokens: 14969060 | Current cost: $0.000 | Current tokens: 101\u001b[0m\n", "\u001b[32m2026-01-01 18:07:16.165\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.041 | Total tokens: 14973177 | Current cost: $0.001 | Current tokens: 4117\u001b[0m\n", "\u001b[32m2026-01-01 18:07:16.166\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 28 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:40, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:44, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:03<00:49, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:04<00:52, 1.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:05<00:46, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:41, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:40, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:40, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:08<00:40, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:09<00:37, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:11<00:41, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:14<01:06, 1.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:15<00:58, 1.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:16<00:51, 1.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:17<00:43, 1.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:18<00:38, 1.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:19<00:39, 1.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:20<00:35, 1.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:23<00:49, 1.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:24<00:39, 1.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:25<00:36, 1.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:26<00:32, 1.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:27<00:30, 1.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:27<00:26, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:28<00:23, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:29<00:23, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:30<00:22, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:31<00:21, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:34<00:29, 1.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:35<00:27, 1.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:36<00:23, 1.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:37<00:21, 1.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:38<00:18, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:38<00:15, 1.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:39<00:14, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:40<00:12, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:41<00:13, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:42<00:11, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:44<00:13, 1.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:45<00:10, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:45<00:08, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:47<00:09, 1.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:48<00:07, 1.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:49<00:06, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:50<00:05, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:51<00:04, 1.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:54<00:04, 1.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:55<00:02, 1.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:56<00:01, 1.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:57<00:00, 1.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 18:08:13.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 28 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.94}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 18:08:16.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.047 | Total tokens: 15011873 | Current cost: $0.001 | Current tokens: 8018\u001b[0m\n", "- The workflow lacks a validation step to ensure that the generated answer aligns with the specific format required ('Final Answer: Yes' or 'Final Answer: No'), leading to potential inconsistencies in output.\n", "- There are multiple instances of incorrect computation results, indicating a failure in the validation process or the underlying logic used to derive answers, as seen in questions regarding MRGBP, PPWD1, SOCS1, and others.\n", "- The workflow does not account for the possibility of ambiguous or contradictory questions, which could lead to misinterpretation and incorrect answers.\n", "- The execution history shows that the same question structure is used repeatedly without adapting to the specific context of each perturbation, which may lead to oversights in unique biological nuances.\n", "\u001b[32m2026-01-01 18:08:19.464\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.047 | Total tokens: 15012561 | Current cost: $0.000 | Current tokens: 688\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['answer', 'question'], 'outputs': ['final_answer']},\n", "{'name': 'validate_answer2087', 'args': ['final_answer'], 'outputs': ['validated_answer']}\n", "]\n", "```\n", "Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPWD1 is perturbed and CLCA1 expression is observed. Does this perturbation lead to a significant difference in CLCA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to UFL1 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GMPPB, does the expression profile of RHCE indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NEDD8 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5B is perturbed and FCER1G expression is observed. Does this perturbation lead to a significant difference in FCER1G expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SCYL1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDIA6 is perturbed and SNHG12 expression is measured. Determine whether SNHG12 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IDH3A is perturbed and SNHG12 expression is observed. Does this perturbation lead to a significant difference in SNHG12 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PDIA6 is perturbed and YTHDF2 expression is observed. Does this perturbation lead to a significant difference in YTHDF2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ARHGAP22, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRGBP and then measure expression of RPS27. Does this perturbation cause a significant change in RPS27 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SPCS2 is perturbed and the expression of SH3BGRL3 is measured. Determine whether SH3BGRL3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb HYOU1 and examine the expression of SNHG12. Does perturbing HYOU1 lead to a significant change in SNHG12 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, P4HB is perturbed and RHCE expression is measured. Determine whether RHCE exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SEC63 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DAD1 and examine the expression of CLCA1. Does perturbing DAD1 lead to a significant change in CLCA1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, HSD17B12 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SLC35B1 and examine the expression of RHCE. Does perturbing SLC35B1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPZ1, does the expression profile of SH3BGRL3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CHERP is perturbed and NPL expression is quantified. Does this perturbation result in a significant change in NPL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PPWD1 is associated with a significant change in CD52 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TIMM23 and then measure expression of SH3BGRL3. Does this perturbation cause a significant change in SH3BGRL3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SAMM50 and examine the expression of FCGR2A. Does perturbing SAMM50 lead to a significant change in FCGR2A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZNF326 is associated with a significant change in RGS16 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SLMO2 and then measure expression of PHGDH. Does this perturbation cause a significant change in PHGDH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb IER3IP1 and monitor FCER1G expression. Decide whether this perturbation leads to a significant alteration in FCER1G expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TTI1 and examine the expression of RHCE. Does perturbing TTI1 lead to a significant change in RHCE expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, IER3IP1 is perturbed and CLCA1 expression is measured. Determine whether CLCA1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MARS, does the expression profile of FAM129A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GBF1 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SOCS1 is perturbed and RPS27 expression is measured. Determine whether RPS27 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DDIT3 and examine the expression of RGS16. Does perturbing DDIT3 lead to a significant change in RGS16 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SYVN1 is perturbed and YTHDF2 expression is quantified. Does this perturbation result in a significant change in YTHDF2 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 18:08:22.815\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.048 | Total tokens: 15017748 | Current cost: $0.001 | Current tokens: 5187\u001b[0m\n", "\u001b[32m2026-01-01 18:08:23.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.048 | Total tokens: 15017847 | Current cost: $0.000 | Current tokens: 99\u001b[0m\n", "\u001b[32m2026-01-01 18:08:25.226\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.048 | Total tokens: 15020062 | Current cost: $0.000 | Current tokens: 2215\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 18:08:27.886\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.049 | Total tokens: 15025280 | Current cost: $0.001 | Current tokens: 5218\u001b[0m\n", "\u001b[32m2026-01-01 18:08:29.344\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.049 | Total tokens: 15025375 | Current cost: $0.000 | Current tokens: 95\u001b[0m\n", "\u001b[32m2026-01-01 18:08:33.967\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.050 | Total tokens: 15029613 | Current cost: $0.001 | Current tokens: 4238\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 18:08:36.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.051 | Total tokens: 15034857 | Current cost: $0.001 | Current tokens: 5244\u001b[0m\n", "\u001b[32m2026-01-01 18:08:37.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.051 | Total tokens: 15034952 | Current cost: $0.000 | Current tokens: 95\u001b[0m\n", "\u001b[32m2026-01-01 18:08:40.591\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.052 | Total tokens: 15038817 | Current cost: $0.001 | Current tokens: 3865\u001b[0m\n", "\u001b[32m2026-01-01 18:08:40.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 29 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:41, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:44, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:42, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:40, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:41, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:43, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:41, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:38, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:08<00:35, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:09<00:40, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:10<00:42, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:11<00:38, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:12<00:36, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:13<00:33, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:14<00:35, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:15<00:31, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:16<00:28, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:16<00:27, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:17<00:26, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:18<00:24, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:19<00:26, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:20<00:23, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:20<00:21, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:21<00:21, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:22<00:22, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:25<00:31, 1.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:26<00:30, 1.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:27<00:27, 1.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:28<00:23, 1.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:29<00:22, 1.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:32<00:31, 1.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:33<00:24, 1.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:34<00:21, 1.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:35<00:18, 1.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:36<00:17, 1.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:37<00:15, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:38<00:14, 1.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:39<00:11, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:39<00:10, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:40<00:09, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:41<00:07, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:42<00:06, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:43<00:06, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:44<00:05, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:45<00:05, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:47<00:05, 1.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:48<00:03, 1.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:49<00:02, 1.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:50<00:01, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:51<00:00, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 18:09:31.815\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 29 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.96}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 18:09:34.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.058 | Total tokens: 15077516 | Current cost: $0.001 | Current tokens: 8017\u001b[0m\n", "- The workflow lacks a validation step to ensure that the generated answer aligns with the specific format required ('Final Answer: Yes' or 'Final Answer: No'), which could lead to incorrect outputs.\n", "- There are multiple instances of incorrect computation results leading to discrepancies between predictions and solutions, indicating potential flaws in the answer generation or validation process.\n", "- The workflow does not account for the possibility of ambiguous or contradictory questions, which could result in misleading answers.\n", "- The control flow does not include error handling for cases where the validation step fails, leading to unhandled cases in the output.\n", "- The assumption that all questions can be answered with a binary response may not hold true for all contexts, indicating a lack of flexibility in the workflow design.\n", "\u001b[32m2026-01-01 18:09:36.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.058 | Total tokens: 15078203 | Current cost: $0.000 | Current tokens: 687\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_answer2087', 'args': ['answer'], 'outputs': ['validated_answer']},\n", "{'name': 'contextualize_answer4593', 'args': ['validated_answer', 'question'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SAMM50 and examine the expression of FCGR2A. Does perturbing SAMM50 lead to a significant change in FCGR2A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GBF1 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZNF326 is associated with a significant change in RGS16 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of TELO2, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HARS, does the expression profile of PHGDH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IDH3A is perturbed and SNHG12 expression is observed. Does this perturbation lead to a significant difference in SNHG12 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SEL1L is associated with a significant change in TXNIP expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SPCS2 is perturbed and the expression of SH3BGRL3 is measured. Determine whether SH3BGRL3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SRP68 and then measure expression of PPCS. Does this perturbation cause a significant change in PPCS expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TIMM23 is perturbed and the expression of CLCA1 is measured. Determine whether CLCA1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DAD1 and monitor SNHG12 expression. Decide whether this perturbation leads to a significant alteration in SNHG12 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, KCTD16 is perturbed and the expression of SNHG12 is measured. Does this perturbation cause a significant change in SNHG12 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DERL2 and then measure expression of FCER1G. Does this perturbation cause a significant change in FCER1G expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD4 and monitor PSMD4 expression. Decide whether this perturbation leads to a significant alteration in PSMD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DDIT3 is perturbed and RPS27 expression is quantified. Does this perturbation result in a significant change in RPS27 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb HYOU1 and examine the expression of SNHG12. Does perturbing HYOU1 lead to a significant change in SNHG12 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CARS is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SEC63 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb FARSB and monitor FCGR2A expression. Decide whether this perturbation leads to a significant alteration in FCGR2A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ARHGAP22 is perturbed and the expression of SH3BGRL3 is measured. Does this perturbation cause a significant change in SH3BGRL3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which IER3IP1 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, STT3A is perturbed and the expression of FCER1G is measured. Determine whether FCER1G shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ZNF326 is perturbed and ZNF326 expression is measured. Determine whether ZNF326 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TMEM167A is perturbed and the expression of CD52 is measured. Does this perturbation cause a significant change in CD52 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IER3IP1, does the expression profile of FCER1G indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TIMM23 is perturbed and SH3BGRL3 expression is observed. Does this perturbation lead to a significant difference in SH3BGRL3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb IER3IP1 and monitor FCER1G expression. Decide whether this perturbation leads to a significant alteration in FCER1G expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of COPB1, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PDIA6 is perturbed and YTHDF2 expression is observed. Does this perturbation lead to a significant difference in YTHDF2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, P4HB is perturbed and RHCE expression is measured. Determine whether RHCE exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TTI1 is perturbed and the expression of CD52 is measured. Determine whether CD52 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CARS and then measure expression of FCGR2A. Does this perturbation cause a significant change in FCGR2A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, IER3IP1 is perturbed and CLCA1 expression is measured. Determine whether CLCA1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SRP68 is perturbed and FCER1G expression is quantified. Does this perturbation result in a significant change in FCER1G expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, P4HB is perturbed and RHCE expression is quantified. Does this perturbation result in a significant change in RHCE expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, HSD17B12 is perturbed and FCGR2A expression is quantified. Does this perturbation result in a significant change in FCGR2A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SYVN1 and then measure expression of CD52. Does this perturbation cause a significant change in CD52 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MARS, does the expression profile of FAM129A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UFM1 is perturbed and the expression of CLCA1 is measured. Does this perturbation cause a significant change in CLCA1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AMIGO3 is perturbed and TXNIP expression is observed. Does this perturbation lead to a significant difference in TXNIP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRGBP and examine the expression of RPS27. Does perturbing MRGBP lead to a significant change in RPS27 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BHLHE40 is perturbed and SESN2 expression is observed. Does this perturbation lead to a significant difference in SESN2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SLMO2 is perturbed and the expression of FCGR2A is measured. Does this perturbation cause a significant change in FCGR2A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, IARS2 is perturbed and the expression of PHGDH is measured. Determine whether PHGDH shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, EIF2B2 is perturbed and FCGR2A expression is measured. Determine whether FCGR2A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SRPRB is perturbed and PPCS expression is observed. Does this perturbation lead to a significant difference in PPCS expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ASCC3 is associated with a significant change in SH3BGRL3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SPCS3, does the expression profile of CD52 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SRPRB is perturbed and the expression of PPCS is measured. Determine whether PPCS shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, BHLHE40 is perturbed and SESN2 expression is measured. Determine whether SESN2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context from `{question}` to determine the best answer. If the answer is uncertain or conflicting, clearly indicate this. Ensure that the answer is validated against reliable sources for accuracy before proceeding. Provide your final answer in a clear format, without extra commentary or reasoning.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 18:09:39.365\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.059 | Total tokens: 15083389 | Current cost: $0.001 | Current tokens: 5186\u001b[0m\n", "\u001b[32m2026-01-01 18:09:40.267\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.059 | Total tokens: 15083491 | Current cost: $0.000 | Current tokens: 102\u001b[0m\n", "\u001b[32m2026-01-01 18:09:41.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.059 | Total tokens: 15085768 | Current cost: $0.000 | Current tokens: 2277\u001b[0m\n", "{'name': 'validate_answer2087', 'description': 'Task to validate_answer2087. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer2087', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer2087', 'required': True}], 'prompt': '\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. First, validate the context to ensure accuracy and relevance. If the answer is uncertain or ambiguous, clearly state that uncertainty. After validation, generate a straightforward answer that directly addresses {question}, considering any relevant nuances. Format your output in XML, using to explain your reasoning and for the final response. Ensure that the answer is concise and reflects any significant changes accurately, avoiding oversimplification.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 18:09:43.954\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.060 | Total tokens: 15090996 | Current cost: $0.001 | Current tokens: 5228\u001b[0m\n", "\u001b[32m2026-01-01 18:09:44.789\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.060 | Total tokens: 15091093 | Current cost: $0.000 | Current tokens: 97\u001b[0m\n", "\u001b[32m2026-01-01 18:09:46.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.061 | Total tokens: 15095063 | Current cost: $0.001 | Current tokens: 3970\u001b[0m\n", "{'name': 'contextualize_answer4593', 'description': 'Task to contextualize_answer4593. Takes validated_answer, question as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for contextualize_answer4593', 'required': False}, {'name': 'question', 'type': 'str', 'description': 'Input parameter question for contextualize_answer4593', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from contextualize_answer4593', 'required': True}], 'prompt': '```xml\\n\"\"\"\\nTo answer the question accurately, first generate an answer based on the provided {question}. Validate the generated answer by ensuring it aligns with the context of the {question} and meets the criteria for a \"validated answer,\" which includes correctness and relevance. If the generated answer does not significantly change the context or is incorrect, indicate this in your validation. In your thought process, clarify any assumptions made and how the context relates to the answer. Ensure that the final answer is presented in the format \\'Final Answer: Yes\\' or \\'Final Answer: No\\'. Provide your reasoning in the field and the final validated answer in the field.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}\n", "\u001b[32m2026-01-01 18:09:49.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.061 | Total tokens: 15100284 | Current cost: $0.001 | Current tokens: 5221\u001b[0m\n", "\u001b[32m2026-01-01 18:09:50.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.062 | Total tokens: 15100405 | Current cost: $0.000 | Current tokens: 121\u001b[0m\n", "\u001b[32m2026-01-01 18:09:52.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $3.062 | Total tokens: 15104807 | Current cost: $0.001 | Current tokens: 4402\u001b[0m\n", "\u001b[32m2026-01-01 18:09:52.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1049\u001b[0m - \u001b[1mEvaluate the workflow at step 30 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:45, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:03<01:22, 1.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:04<01:11, 1.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:05<00:58, 1.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:06<00:47, 1.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:06<00:43, 1.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:07<00:43, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:09<00:44, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:09<00:38, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:10<00:35, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:11<00:34, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:12<00:31, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:13<00:35, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:14<00:33, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:15<00:32, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:16<00:30, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:16<00:29, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:17<00:28, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:18<00:26, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:19<00:25, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:21<00:32, 1.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:22<00:33, 1.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:23<00:28, 1.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:24<00:26, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:25<00:25, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:26<00:26, 1.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:27<00:23, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:28<00:22, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:29<00:20, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:30<00:19, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:31<00:18, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:32<00:17, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:33<00:16, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:33<00:13, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:34<00:12, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:35<00:12, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:36<00:10, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:37<00:10, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:38<00:10, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:39<00:09, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:39<00:07, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:40<00:07, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:41<00:06, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:42<00:05, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:43<00:04, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:45<00:05, 1.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:48<00:05, 1.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:49<00:03, 1.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:50<00:01, 1.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:51<00:00, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-01 18:10:44.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1052\u001b[0m - \u001b[1mStep 30 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.92}\u001b[0m\n", "randomly update dataset\n", "\u001b[32m2026-01-01 18:10:44.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1059\u001b[0m - \u001b[1mReach the maximum number of steps 30. Stop the optimization.\u001b[0m\n", "\u001b[32m2026-01-01 18:10:44.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1062\u001b[0m - \u001b[1mRestore the best graph from the snapshot ...\u001b[0m\n", "\u001b[32m2026-01-01 18:10:44.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36mrestore_best_graph\u001b[0m:\u001b[36m1211\u001b[0m - \u001b[1mRestore the best graph from snapshot with metrics {'f1': 0.0, 'em': 0.0, 'acc': 0.98} ...\u001b[0m\n", "\u001b[32m2026-01-01 18:10:44.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36mrestore_best_graph\u001b[0m:\u001b[36m1211\u001b[0m - \u001b[1mRestore the best graph from snapshot with metrics {'f1': 0.0, 'em': 0.0, 'acc': 0.98} ...\u001b[0m\n", "\u001b[32m2026-01-01 18:10:44.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36msave_module\u001b[0m:\u001b[36m1201\u001b[0m - \u001b[1mSaving SequentialWorkFlowGraph to ./debug/save_30_noreason.json\u001b[0m\n", "\u001b[32m2026-01-01 18:10:44.631\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.agents.customize_agent\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m92\u001b[0m - \u001b[33m\u001b[1mBoth `prompt` and `prompt_template` are provided in `CustomizeAgent`. `prompt_template` will be used.\u001b[0m\n", "\u001b[32m2026-01-01 18:10:44.651\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.agents.customize_agent\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m92\u001b[0m - \u001b[33m\u001b[1mBoth `prompt` and `prompt_template` are provided in `CustomizeAgent`. `prompt_template` will be used.\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "Evaluating workflow: 0%| | 1/2500 [00:04<3:02:06, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 2/2500 [00:08<2:50:43, 4.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 3/2500 [00:12<2:43:41, 3.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 4/2500 [00:15<2:32:37, 3.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 5/2500 [00:18<2:28:15, 3.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 6/2500 [00:22<2:26:47, 3.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 7/2500 [00:26<2:35:29, 3.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 8/2500 [00:30<2:35:40, 3.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 9/2500 [00:33<2:30:22, 3.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 10/2500 [00:37<2:40:11, 3.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 11/2500 [00:41<2:38:09, 3.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 12/2500 [00:44<2:29:30, 3.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 13/2500 [00:48<2:29:50, 3.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 14/2500 [00:52<2:40:28, 3.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 15/2500 [00:57<2:56:52, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 16/2500 [01:03<3:17:09, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 17/2500 [01:07<3:03:33, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 18/2500 [01:11<2:56:05, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 19/2500 [01:16<3:09:33, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 20/2500 [01:21<3:06:13, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 21/2500 [01:25<3:05:24, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 22/2500 [01:31<3:29:02, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 23/2500 [01:38<3:45:30, 5.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 24/2500 [01:42<3:34:21, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 25/2500 [01:46<3:11:58, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 26/2500 [01:50<3:07:05, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 27/2500 [01:53<2:53:02, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 28/2500 [01:57<2:51:13, 4.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 29/2500 [02:02<2:58:55, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 30/2500 [02:08<3:15:11, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 31/2500 [02:13<3:22:44, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%|▏ | 32/2500 [02:17<3:13:07, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%|▏ | 33/2500 [02:22<3:09:51, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%|▏ | 34/2500 [02:28<3:31:10, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%|▏ | 35/2500 [02:31<3:07:24, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%|▏ | 36/2500 [02:36<3:10:26, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%|▏ | 37/2500 [02:41<3:10:51, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 38/2500 [02:45<3:07:19, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 39/2500 [02:50<3:11:05, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 40/2500 [02:54<2:59:08, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 41/2500 [03:00<3:15:35, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 42/2500 [03:06<3:40:42, 5.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 43/2500 [03:11<3:32:46, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 44/2500 [03:16<3:30:06, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 45/2500 [03:23<3:46:06, 5.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 46/2500 [03:27<3:29:28, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 47/2500 [03:30<3:07:43, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 48/2500 [03:34<2:57:56, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 49/2500 [03:38<2:52:30, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 50/2500 [03:42<2:50:23, 4.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 51/2500 [03:48<3:13:18, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 52/2500 [03:52<3:08:09, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 53/2500 [03:57<3:08:11, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 54/2500 [04:01<2:59:22, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 55/2500 [04:05<2:53:11, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 56/2500 [04:09<2:49:24, 4.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 57/2500 [04:15<3:15:37, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 58/2500 [04:19<3:00:23, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 59/2500 [04:22<2:51:23, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 60/2500 [04:27<3:03:09, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 61/2500 [04:32<2:58:36, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 62/2500 [04:37<3:09:35, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 63/2500 [04:41<3:06:05, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 64/2500 [04:44<2:50:01, 4.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 65/2500 [04:48<2:36:57, 3.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 66/2500 [04:52<2:49:20, 4.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 67/2500 [04:56<2:41:27, 3.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 68/2500 [05:00<2:40:37, 3.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 69/2500 [05:04<2:45:18, 4.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 70/2500 [05:11<3:18:53, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 71/2500 [05:16<3:19:32, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 72/2500 [05:23<3:45:20, 5.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 73/2500 [05:27<3:22:50, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 74/2500 [05:32<3:22:35, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 75/2500 [05:36<3:05:37, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 76/2500 [05:40<3:01:31, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 77/2500 [05:45<3:09:57, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 78/2500 [05:51<3:24:17, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 79/2500 [05:55<3:07:49, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 80/2500 [05:59<3:10:16, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 81/2500 [06:04<3:08:51, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 82/2500 [06:07<2:53:43, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 83/2500 [06:14<3:15:33, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 84/2500 [06:20<3:37:09, 5.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 85/2500 [06:24<3:20:25, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 86/2500 [06:29<3:14:31, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 87/2500 [06:33<3:03:05, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 88/2500 [06:38<3:15:04, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 89/2500 [06:42<3:03:42, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 90/2500 [06:47<3:08:30, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 91/2500 [06:51<3:04:19, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 92/2500 [06:57<3:14:57, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 93/2500 [07:00<2:52:33, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 94/2500 [07:04<2:50:01, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 95/2500 [07:09<2:54:32, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 96/2500 [07:14<3:03:29, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 97/2500 [07:19<3:10:16, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 98/2500 [07:23<3:00:28, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 99/2500 [07:28<3:09:45, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 100/2500 [07:33<3:06:43, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 101/2500 [07:38<3:10:32, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 102/2500 [07:41<2:48:48, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 103/2500 [07:45<2:55:03, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 104/2500 [07:50<3:02:43, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 105/2500 [07:54<2:50:04, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 106/2500 [07:59<2:56:57, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 107/2500 [08:05<3:22:37, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 108/2500 [08:09<3:07:00, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 109/2500 [08:14<3:06:06, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 110/2500 [08:20<3:28:19, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 111/2500 [08:24<3:15:43, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 112/2500 [08:30<3:21:35, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 113/2500 [08:35<3:26:12, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 114/2500 [08:42<3:46:49, 5.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 115/2500 [08:47<3:35:50, 5.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 116/2500 [08:54<3:48:43, 5.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 117/2500 [08:57<3:20:51, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 118/2500 [09:02<3:14:38, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 119/2500 [09:06<3:09:02, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 120/2500 [09:10<3:01:35, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 121/2500 [09:14<2:49:37, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 122/2500 [09:20<3:17:51, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 123/2500 [09:25<3:10:43, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 124/2500 [09:28<2:54:03, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 125/2500 [09:34<3:11:50, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 126/2500 [09:38<3:05:42, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 127/2500 [09:44<3:17:03, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 128/2500 [09:49<3:14:52, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 129/2500 [09:53<3:03:32, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 130/2500 [09:57<2:53:01, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 131/2500 [10:00<2:46:33, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 132/2500 [10:04<2:36:22, 3.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 133/2500 [10:09<2:51:08, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 134/2500 [10:13<2:47:01, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 135/2500 [10:16<2:36:23, 3.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 136/2500 [10:20<2:36:39, 3.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 137/2500 [10:23<2:23:17, 3.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 138/2500 [10:26<2:17:14, 3.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 139/2500 [10:36<3:28:37, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 140/2500 [10:41<3:25:05, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 141/2500 [10:47<3:31:21, 5.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 142/2500 [10:51<3:14:39, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 143/2500 [10:55<3:11:53, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 144/2500 [11:00<3:05:14, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 145/2500 [11:04<3:06:36, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 146/2500 [11:10<3:13:53, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 147/2500 [11:14<3:03:37, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 148/2500 [11:18<2:50:43, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 149/2500 [11:21<2:43:02, 4.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 150/2500 [11:26<2:54:21, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 151/2500 [11:31<2:57:40, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 152/2500 [11:34<2:38:47, 4.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 153/2500 [11:39<2:52:16, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 154/2500 [11:44<2:58:20, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 155/2500 [11:48<2:47:25, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 156/2500 [11:53<3:02:12, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 157/2500 [11:59<3:14:55, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 158/2500 [12:03<2:57:54, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 159/2500 [12:06<2:41:29, 4.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 160/2500 [12:10<2:41:41, 4.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 161/2500 [12:14<2:41:24, 4.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 162/2500 [12:18<2:34:33, 3.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 163/2500 [12:22<2:33:04, 3.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 164/2500 [12:26<2:34:45, 3.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 165/2500 [12:32<3:01:20, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 166/2500 [12:37<3:05:05, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 167/2500 [12:45<3:48:55, 5.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 168/2500 [12:49<3:24:08, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 169/2500 [12:53<3:07:28, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 170/2500 [12:57<3:02:00, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 171/2500 [13:03<3:12:29, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 172/2500 [13:07<3:00:09, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 173/2500 [13:11<2:53:03, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 174/2500 [13:16<2:55:46, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 175/2500 [13:19<2:42:14, 4.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 176/2500 [13:22<2:31:18, 3.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 177/2500 [13:27<2:44:39, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 178/2500 [13:31<2:37:52, 4.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 179/2500 [13:35<2:35:28, 4.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 180/2500 [13:38<2:31:06, 3.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 181/2500 [13:44<2:46:47, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 182/2500 [13:47<2:35:47, 4.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 183/2500 [13:52<2:41:29, 4.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 184/2500 [13:56<2:40:54, 4.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 185/2500 [14:01<2:52:27, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 186/2500 [14:04<2:33:41, 3.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 187/2500 [14:09<2:43:36, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 188/2500 [14:15<3:07:48, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 189/2500 [14:21<3:20:11, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 190/2500 [14:25<3:07:29, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 191/2500 [14:29<2:56:49, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 192/2500 [14:34<3:06:36, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 193/2500 [14:38<2:56:20, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 194/2500 [14:42<2:45:30, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 195/2500 [14:46<2:38:01, 4.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 196/2500 [14:50<2:43:37, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 197/2500 [14:55<2:44:23, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 198/2500 [15:01<3:04:01, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 199/2500 [15:04<2:44:39, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 200/2500 [15:09<2:53:06, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 201/2500 [15:13<2:50:53, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 202/2500 [15:17<2:42:21, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 203/2500 [15:21<2:38:19, 4.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 204/2500 [15:24<2:26:51, 3.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 205/2500 [15:29<2:40:39, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 206/2500 [15:33<2:34:00, 4.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 207/2500 [15:38<2:52:53, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 208/2500 [15:43<2:50:12, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 209/2500 [15:48<2:59:04, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 210/2500 [15:52<2:54:31, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 211/2500 [15:57<2:54:08, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 212/2500 [16:00<2:44:37, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▊ | 213/2500 [16:05<2:45:34, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▊ | 214/2500 [16:09<2:41:17, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▊ | 215/2500 [16:13<2:35:38, 4.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▊ | 216/2500 [16:20<3:15:01, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▊ | 217/2500 [16:28<3:47:48, 5.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▊ | 218/2500 [16:32<3:24:14, 5.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 219/2500 [16:39<3:44:41, 5.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 220/2500 [16:46<3:56:32, 6.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 221/2500 [16:50<3:26:52, 5.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 222/2500 [16:55<3:26:26, 5.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 223/2500 [16:59<3:05:04, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 224/2500 [17:02<2:45:33, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 225/2500 [17:07<2:50:21, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 226/2500 [17:12<2:57:43, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 227/2500 [17:17<3:03:30, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 228/2500 [17:21<2:58:10, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 229/2500 [17:25<2:44:42, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 230/2500 [17:31<3:02:33, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 231/2500 [17:35<2:48:43, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 232/2500 [17:38<2:41:18, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 233/2500 [17:44<2:59:46, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 234/2500 [17:48<2:46:48, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 235/2500 [17:51<2:30:56, 4.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 236/2500 [17:54<2:23:52, 3.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 237/2500 [17:58<2:25:23, 3.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 238/2500 [18:03<2:33:58, 4.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 239/2500 [18:07<2:31:58, 4.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 240/2500 [18:13<3:02:25, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 241/2500 [18:17<2:50:46, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 242/2500 [18:22<2:52:35, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 243/2500 [18:28<3:03:17, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 244/2500 [18:32<2:53:01, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 245/2500 [18:36<2:51:39, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 246/2500 [18:41<2:54:52, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 247/2500 [18:46<3:05:34, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 248/2500 [18:54<3:32:03, 5.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 249/2500 [18:58<3:12:40, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 250/2500 [19:02<3:05:27, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 251/2500 [19:07<3:02:55, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 252/2500 [19:11<2:49:36, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 253/2500 [19:15<2:43:58, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 254/2500 [19:20<2:52:59, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 255/2500 [19:23<2:40:14, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 256/2500 [19:28<2:43:17, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 257/2500 [19:34<2:57:46, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 258/2500 [19:37<2:38:28, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 259/2500 [19:41<2:38:34, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 260/2500 [19:47<3:03:22, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 261/2500 [19:53<3:06:11, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 262/2500 [19:57<2:56:40, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 263/2500 [20:02<3:00:41, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 264/2500 [20:05<2:46:53, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 265/2500 [20:11<2:55:59, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 266/2500 [20:16<2:59:01, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 267/2500 [20:20<2:51:29, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 268/2500 [20:23<2:36:50, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 269/2500 [20:27<2:31:02, 4.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 270/2500 [20:31<2:33:17, 4.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 271/2500 [20:35<2:32:21, 4.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 272/2500 [20:40<2:43:34, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 273/2500 [20:45<2:46:54, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 274/2500 [20:49<2:39:18, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 275/2500 [20:55<3:03:37, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 276/2500 [20:59<2:52:06, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 277/2500 [21:04<2:50:03, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 278/2500 [21:07<2:39:09, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 279/2500 [21:12<2:41:33, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 280/2500 [21:16<2:40:42, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 281/2500 [21:21<2:44:17, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 282/2500 [21:25<2:46:14, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 283/2500 [21:29<2:38:26, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 284/2500 [21:34<2:41:46, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 285/2500 [21:40<3:07:18, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 286/2500 [21:44<2:55:05, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 287/2500 [21:48<2:36:52, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 288/2500 [21:52<2:43:12, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 289/2500 [21:56<2:37:20, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 290/2500 [22:00<2:33:51, 4.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 291/2500 [22:03<2:19:27, 3.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 292/2500 [22:08<2:27:47, 4.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 293/2500 [22:14<2:48:08, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 294/2500 [22:19<2:53:20, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 295/2500 [22:22<2:43:54, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 296/2500 [22:26<2:33:41, 4.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 297/2500 [22:29<2:21:23, 3.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 298/2500 [22:34<2:32:29, 4.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 299/2500 [22:38<2:28:07, 4.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 300/2500 [22:41<2:23:34, 3.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 301/2500 [22:46<2:35:59, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 302/2500 [22:51<2:35:06, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 303/2500 [22:54<2:31:23, 4.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 304/2500 [22:59<2:38:00, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 305/2500 [23:03<2:29:44, 4.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 306/2500 [23:06<2:23:33, 3.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 307/2500 [23:10<2:16:58, 3.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 308/2500 [23:14<2:26:30, 4.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 309/2500 [23:19<2:29:46, 4.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 310/2500 [23:23<2:33:16, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 311/2500 [23:27<2:31:34, 4.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 312/2500 [23:32<2:38:31, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 313/2500 [23:36<2:34:55, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 314/2500 [23:44<3:13:05, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 315/2500 [23:48<3:05:43, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 316/2500 [23:53<2:59:23, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 317/2500 [23:59<3:15:28, 5.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 318/2500 [24:08<3:53:55, 6.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 319/2500 [24:18<4:26:44, 7.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 320/2500 [24:21<3:44:21, 6.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 321/2500 [24:26<3:30:56, 5.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 322/2500 [24:30<3:14:10, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 323/2500 [24:34<2:56:56, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 324/2500 [24:38<2:46:11, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 325/2500 [24:42<2:35:42, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 326/2500 [24:46<2:33:59, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 327/2500 [24:49<2:20:34, 3.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 328/2500 [24:52<2:15:10, 3.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 329/2500 [24:58<2:39:56, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 330/2500 [25:03<2:48:49, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 331/2500 [25:08<2:50:12, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 332/2500 [25:13<2:54:14, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 333/2500 [25:17<2:41:39, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 334/2500 [25:20<2:26:48, 4.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 335/2500 [25:23<2:15:11, 3.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 336/2500 [25:27<2:18:46, 3.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 337/2500 [25:32<2:28:17, 4.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 338/2500 [25:36<2:23:17, 3.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 339/2500 [25:41<2:39:51, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 340/2500 [25:47<2:52:47, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 341/2500 [25:51<2:52:17, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 342/2500 [25:56<2:48:46, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 343/2500 [26:00<2:37:04, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 344/2500 [26:04<2:41:43, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 345/2500 [26:08<2:32:32, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 346/2500 [26:12<2:25:21, 4.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 347/2500 [26:15<2:16:41, 3.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 348/2500 [26:18<2:11:57, 3.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 349/2500 [26:22<2:16:44, 3.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 350/2500 [26:26<2:11:35, 3.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 351/2500 [26:32<2:37:51, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 352/2500 [26:39<3:10:55, 5.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 353/2500 [26:43<2:57:53, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 354/2500 [26:48<2:48:23, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 355/2500 [26:51<2:39:12, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 356/2500 [26:55<2:35:07, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 357/2500 [27:01<2:46:18, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 358/2500 [27:05<2:37:01, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 359/2500 [27:08<2:25:14, 4.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 360/2500 [27:14<2:45:35, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 361/2500 [27:18<2:35:45, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 362/2500 [27:21<2:27:03, 4.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 363/2500 [27:25<2:24:11, 4.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 364/2500 [27:29<2:23:36, 4.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 365/2500 [27:34<2:30:02, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 366/2500 [27:37<2:21:31, 3.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 367/2500 [27:42<2:33:20, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 368/2500 [27:46<2:23:28, 4.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 369/2500 [27:50<2:31:21, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 370/2500 [27:54<2:26:29, 4.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 371/2500 [28:00<2:41:07, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 372/2500 [28:04<2:43:08, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 373/2500 [28:08<2:27:24, 4.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 374/2500 [28:14<2:48:34, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 375/2500 [28:22<3:20:33, 5.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 376/2500 [28:25<2:55:00, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 377/2500 [28:29<2:45:54, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 378/2500 [28:33<2:43:42, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 379/2500 [28:37<2:31:55, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 380/2500 [28:41<2:33:23, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 381/2500 [28:45<2:23:46, 4.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 382/2500 [28:49<2:20:40, 3.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 383/2500 [28:56<2:52:07, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 384/2500 [28:59<2:33:42, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 385/2500 [29:03<2:29:09, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 386/2500 [29:06<2:19:34, 3.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 387/2500 [29:10<2:24:58, 4.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 388/2500 [29:14<2:22:53, 4.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 389/2500 [29:19<2:28:03, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 390/2500 [29:28<3:16:30, 5.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 391/2500 [29:31<2:56:35, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 392/2500 [29:36<2:53:59, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 393/2500 [29:42<2:58:21, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 394/2500 [29:47<2:57:05, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 395/2500 [29:51<2:47:03, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 396/2500 [29:55<2:42:08, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 397/2500 [29:59<2:36:13, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 398/2500 [30:02<2:18:05, 3.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 399/2500 [30:05<2:13:36, 3.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 400/2500 [30:10<2:22:39, 4.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 401/2500 [30:14<2:27:08, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 402/2500 [30:18<2:23:46, 4.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 403/2500 [30:22<2:18:11, 3.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 404/2500 [30:28<2:40:29, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 405/2500 [30:34<2:54:55, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 406/2500 [30:38<2:41:40, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 407/2500 [30:43<2:45:34, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 408/2500 [30:47<2:36:17, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 409/2500 [30:51<2:31:53, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 410/2500 [30:54<2:24:23, 4.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 411/2500 [30:59<2:28:17, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 412/2500 [31:02<2:18:59, 3.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 413/2500 [31:07<2:30:15, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 414/2500 [31:11<2:27:21, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 415/2500 [31:15<2:25:41, 4.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 416/2500 [31:20<2:33:16, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 417/2500 [31:25<2:36:21, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 418/2500 [31:30<2:43:48, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 419/2500 [31:36<2:51:12, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 420/2500 [31:42<3:02:31, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 421/2500 [31:47<2:57:31, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 422/2500 [31:50<2:38:16, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 423/2500 [31:53<2:25:09, 4.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 424/2500 [31:58<2:30:38, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 425/2500 [32:03<2:35:31, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 426/2500 [32:08<2:44:22, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 427/2500 [32:13<2:41:11, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 428/2500 [32:17<2:35:08, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 429/2500 [32:21<2:30:27, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 430/2500 [32:26<2:36:17, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 431/2500 [32:30<2:33:32, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 432/2500 [32:36<2:46:32, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 433/2500 [32:40<2:43:38, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 434/2500 [32:46<2:52:02, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 435/2500 [32:49<2:35:26, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 436/2500 [32:54<2:42:58, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 437/2500 [33:00<2:48:47, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 438/2500 [33:04<2:42:33, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 439/2500 [33:08<2:32:20, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 440/2500 [33:13<2:38:05, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 441/2500 [33:18<2:39:37, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 442/2500 [33:22<2:37:25, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 443/2500 [33:27<2:37:55, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 444/2500 [33:32<2:44:05, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 445/2500 [33:37<2:48:04, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 446/2500 [33:42<2:49:36, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 447/2500 [33:47<2:45:09, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 448/2500 [33:50<2:26:42, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 449/2500 [33:53<2:19:00, 4.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 450/2500 [33:58<2:24:30, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 451/2500 [34:02<2:19:10, 4.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 452/2500 [34:05<2:15:03, 3.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 453/2500 [34:10<2:21:38, 4.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 454/2500 [34:15<2:35:26, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 455/2500 [34:22<2:59:01, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 456/2500 [34:26<2:42:21, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 457/2500 [34:30<2:35:28, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 458/2500 [34:36<2:47:36, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 459/2500 [34:40<2:45:56, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 460/2500 [34:44<2:35:27, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 461/2500 [34:48<2:26:15, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 462/2500 [34:53<2:29:57, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▊ | 463/2500 [34:56<2:23:34, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▊ | 464/2500 [35:02<2:38:07, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▊ | 465/2500 [35:07<2:36:06, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▊ | 466/2500 [35:10<2:26:21, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▊ | 467/2500 [35:15<2:26:52, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▊ | 468/2500 [35:19<2:25:16, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 469/2500 [35:23<2:25:42, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 470/2500 [35:27<2:18:00, 4.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 471/2500 [35:30<2:09:04, 3.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 472/2500 [35:35<2:18:04, 4.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 473/2500 [35:39<2:20:44, 4.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 474/2500 [35:43<2:19:04, 4.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 475/2500 [35:48<2:25:30, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 476/2500 [35:52<2:20:27, 4.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 477/2500 [35:56<2:23:15, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 478/2500 [35:59<2:11:11, 3.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 479/2500 [36:02<2:01:14, 3.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 480/2500 [36:07<2:10:45, 3.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 481/2500 [36:10<2:06:28, 3.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 482/2500 [36:14<2:10:37, 3.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 483/2500 [36:18<2:07:56, 3.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 484/2500 [36:23<2:17:24, 4.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 485/2500 [36:28<2:33:51, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 486/2500 [36:31<2:19:25, 4.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 487/2500 [36:36<2:24:50, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 488/2500 [36:40<2:20:04, 4.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 489/2500 [36:45<2:27:50, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 490/2500 [36:50<2:31:55, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 491/2500 [36:56<2:43:48, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 492/2500 [37:00<2:37:09, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 493/2500 [37:05<2:39:20, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 494/2500 [37:08<2:27:01, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 495/2500 [37:12<2:24:22, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 496/2500 [37:17<2:26:05, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 497/2500 [37:21<2:21:11, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 498/2500 [37:24<2:15:10, 4.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 499/2500 [37:28<2:11:27, 3.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 500/2500 [37:32<2:11:37, 3.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 501/2500 [37:36<2:08:21, 3.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 502/2500 [37:41<2:18:27, 4.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 503/2500 [37:45<2:20:30, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 504/2500 [37:49<2:21:51, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 505/2500 [37:55<2:36:51, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 506/2500 [38:00<2:37:29, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 507/2500 [38:04<2:27:50, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 508/2500 [38:08<2:22:26, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 509/2500 [38:12<2:27:01, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 510/2500 [38:16<2:16:43, 4.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 511/2500 [38:20<2:17:10, 4.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 512/2500 [38:24<2:22:07, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 513/2500 [38:28<2:14:36, 4.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 514/2500 [38:32<2:15:04, 4.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 515/2500 [38:37<2:22:42, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 516/2500 [38:42<2:32:49, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 517/2500 [38:47<2:37:33, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 518/2500 [38:51<2:28:51, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 519/2500 [38:57<2:36:45, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 520/2500 [39:01<2:28:06, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 521/2500 [39:05<2:26:51, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 522/2500 [39:10<2:33:07, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 523/2500 [39:13<2:20:48, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 524/2500 [39:19<2:31:54, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 525/2500 [39:22<2:18:30, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 526/2500 [39:27<2:21:33, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 527/2500 [39:31<2:25:11, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 528/2500 [39:37<2:35:41, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 529/2500 [39:41<2:34:49, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 530/2500 [39:46<2:30:37, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 531/2500 [39:50<2:25:08, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 532/2500 [39:55<2:29:37, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 533/2500 [39:59<2:30:27, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 534/2500 [40:04<2:29:12, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 535/2500 [40:08<2:27:57, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 536/2500 [40:12<2:19:10, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 537/2500 [40:15<2:12:38, 4.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 538/2500 [40:21<2:31:18, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 539/2500 [40:26<2:30:52, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 540/2500 [40:31<2:30:34, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 541/2500 [40:34<2:15:08, 4.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 542/2500 [40:38<2:13:27, 4.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 543/2500 [40:42<2:18:20, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 544/2500 [40:46<2:12:57, 4.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 545/2500 [40:52<2:35:03, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 546/2500 [40:57<2:33:17, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 547/2500 [41:01<2:29:17, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 548/2500 [41:07<2:41:31, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 549/2500 [41:12<2:38:09, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 550/2500 [41:17<2:43:01, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 551/2500 [41:22<2:44:11, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 552/2500 [41:26<2:35:39, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 553/2500 [41:32<2:44:42, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 554/2500 [41:36<2:33:00, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 555/2500 [41:40<2:27:01, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 556/2500 [41:45<2:26:30, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 557/2500 [41:48<2:15:25, 4.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 558/2500 [41:52<2:18:44, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 559/2500 [41:56<2:09:35, 4.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 560/2500 [41:59<2:05:27, 3.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 561/2500 [42:02<1:56:21, 3.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 562/2500 [42:07<2:01:49, 3.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 563/2500 [42:11<2:10:31, 4.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 564/2500 [42:16<2:21:12, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 565/2500 [42:20<2:14:28, 4.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 566/2500 [42:24<2:10:25, 4.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 567/2500 [42:29<2:19:33, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 568/2500 [42:34<2:23:32, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 569/2500 [42:38<2:19:42, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 570/2500 [42:41<2:15:13, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 571/2500 [42:45<2:08:23, 3.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 572/2500 [42:49<2:10:44, 4.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 573/2500 [42:53<2:11:05, 4.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 574/2500 [42:58<2:13:23, 4.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 575/2500 [43:01<2:08:38, 4.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 576/2500 [43:05<2:04:33, 3.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 577/2500 [43:09<2:02:11, 3.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 578/2500 [43:14<2:14:26, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 579/2500 [43:18<2:12:09, 4.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 580/2500 [43:21<2:05:09, 3.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 581/2500 [43:25<2:03:59, 3.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 582/2500 [43:31<2:21:18, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 583/2500 [43:36<2:26:44, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 584/2500 [43:40<2:24:12, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 585/2500 [43:46<2:40:28, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 586/2500 [43:51<2:36:34, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 587/2500 [43:55<2:27:22, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 588/2500 [43:58<2:13:50, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 589/2500 [44:04<2:29:32, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 590/2500 [44:09<2:31:25, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 591/2500 [44:12<2:15:40, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 592/2500 [44:18<2:38:13, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 593/2500 [44:22<2:24:54, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 594/2500 [44:28<2:43:00, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 595/2500 [44:33<2:33:01, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 596/2500 [44:36<2:24:29, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 597/2500 [44:40<2:17:59, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 598/2500 [44:44<2:10:31, 4.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 599/2500 [44:48<2:05:32, 3.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 600/2500 [44:52<2:14:59, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 601/2500 [44:56<2:06:02, 3.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 602/2500 [44:59<1:58:55, 3.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 603/2500 [45:04<2:08:22, 4.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 604/2500 [45:09<2:15:58, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 605/2500 [45:12<2:08:38, 4.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 606/2500 [45:17<2:12:47, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 607/2500 [45:20<2:01:03, 3.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 608/2500 [45:24<2:01:01, 3.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 609/2500 [45:29<2:20:38, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 610/2500 [45:34<2:16:31, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 611/2500 [45:38<2:21:03, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 612/2500 [45:43<2:22:29, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 613/2500 [45:47<2:14:47, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 614/2500 [45:51<2:12:50, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 615/2500 [45:55<2:12:21, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 616/2500 [45:58<2:05:47, 4.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 617/2500 [46:04<2:19:35, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 618/2500 [46:09<2:21:05, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 619/2500 [46:13<2:17:24, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 620/2500 [46:17<2:14:17, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 621/2500 [46:22<2:21:31, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 622/2500 [46:28<2:35:28, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 623/2500 [46:32<2:24:16, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 624/2500 [46:36<2:24:08, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 625/2500 [46:40<2:14:16, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 626/2500 [46:44<2:18:01, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 627/2500 [46:49<2:21:17, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 628/2500 [46:53<2:14:11, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 629/2500 [46:56<2:03:31, 3.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 630/2500 [47:00<2:00:26, 3.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 631/2500 [47:04<2:07:44, 4.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 632/2500 [47:08<2:06:48, 4.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 633/2500 [47:12<2:02:57, 3.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 634/2500 [47:17<2:08:14, 4.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 635/2500 [47:22<2:21:47, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 636/2500 [47:26<2:09:59, 4.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 637/2500 [47:31<2:23:59, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 638/2500 [47:35<2:16:47, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 639/2500 [47:39<2:10:45, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 640/2500 [47:42<2:02:39, 3.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 641/2500 [47:48<2:21:46, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 642/2500 [47:52<2:11:56, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 643/2500 [47:55<2:05:41, 4.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 644/2500 [48:00<2:12:30, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 645/2500 [48:05<2:16:19, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 646/2500 [48:10<2:26:36, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 647/2500 [48:14<2:12:05, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 648/2500 [48:18<2:10:00, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 649/2500 [48:22<2:06:16, 4.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 650/2500 [48:26<2:06:39, 4.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 651/2500 [48:30<2:13:17, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 652/2500 [48:34<2:08:06, 4.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 653/2500 [48:39<2:17:01, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 654/2500 [48:44<2:16:43, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 655/2500 [48:49<2:21:51, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 656/2500 [48:52<2:08:00, 4.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 657/2500 [48:55<1:58:30, 3.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 658/2500 [48:59<2:02:29, 3.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 659/2500 [49:05<2:14:26, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 660/2500 [49:10<2:22:12, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 661/2500 [49:13<2:10:56, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 662/2500 [49:19<2:19:48, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 663/2500 [49:23<2:20:38, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 664/2500 [49:27<2:14:17, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 665/2500 [49:32<2:15:09, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 666/2500 [49:36<2:15:35, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 667/2500 [49:43<2:36:03, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 668/2500 [49:47<2:28:23, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 669/2500 [49:53<2:41:14, 5.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 670/2500 [49:59<2:42:53, 5.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 671/2500 [50:05<2:48:38, 5.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 672/2500 [50:11<2:51:31, 5.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 673/2500 [50:15<2:35:51, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 674/2500 [50:20<2:36:21, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 675/2500 [50:24<2:27:19, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 676/2500 [50:29<2:31:46, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 677/2500 [50:38<3:08:40, 6.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 678/2500 [50:42<2:48:15, 5.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 679/2500 [50:46<2:32:50, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 680/2500 [50:53<2:53:09, 5.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 681/2500 [50:59<2:54:35, 5.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 682/2500 [51:06<3:03:32, 6.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 683/2500 [51:12<2:58:40, 5.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 684/2500 [51:18<3:07:16, 6.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 685/2500 [51:24<3:02:54, 6.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 686/2500 [51:30<3:00:57, 5.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 687/2500 [51:35<2:51:18, 5.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 688/2500 [51:42<3:02:59, 6.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 689/2500 [51:50<3:19:21, 6.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 690/2500 [51:54<2:57:44, 5.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 691/2500 [52:02<3:17:33, 6.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 692/2500 [52:10<3:27:28, 6.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 693/2500 [52:16<3:17:40, 6.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 694/2500 [52:24<3:30:13, 6.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 695/2500 [52:29<3:19:46, 6.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 696/2500 [52:35<3:07:12, 6.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 697/2500 [52:39<2:47:19, 5.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 698/2500 [52:45<2:58:17, 5.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 699/2500 [52:50<2:46:52, 5.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 700/2500 [52:54<2:34:04, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 701/2500 [52:59<2:34:19, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 702/2500 [53:07<2:53:34, 5.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 703/2500 [53:14<3:07:56, 6.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 704/2500 [53:20<3:00:37, 6.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 705/2500 [53:25<2:54:44, 5.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 706/2500 [53:30<2:45:03, 5.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 707/2500 [53:35<2:41:11, 5.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 708/2500 [53:39<2:33:03, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 709/2500 [53:45<2:40:10, 5.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 710/2500 [53:50<2:32:58, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 711/2500 [53:54<2:23:33, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 712/2500 [53:59<2:21:21, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▊ | 713/2500 [54:02<2:09:20, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▊ | 714/2500 [54:06<2:06:44, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▊ | 715/2500 [54:11<2:17:01, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▊ | 716/2500 [54:16<2:13:53, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▊ | 717/2500 [54:22<2:30:46, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▊ | 718/2500 [54:26<2:23:16, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 719/2500 [54:35<2:53:41, 5.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 720/2500 [54:40<2:46:28, 5.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 721/2500 [54:44<2:33:26, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 722/2500 [54:49<2:33:33, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 723/2500 [54:53<2:25:49, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 724/2500 [54:57<2:13:56, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 725/2500 [55:02<2:18:33, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 726/2500 [55:07<2:24:43, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 727/2500 [55:12<2:24:52, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 728/2500 [55:18<2:34:11, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 729/2500 [55:24<2:36:10, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 730/2500 [55:29<2:33:03, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 731/2500 [55:32<2:20:45, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 732/2500 [55:37<2:19:37, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 733/2500 [55:41<2:09:11, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 734/2500 [55:45<2:08:02, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 735/2500 [55:49<2:05:35, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 736/2500 [55:53<2:06:39, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 737/2500 [55:59<2:18:03, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 738/2500 [56:04<2:19:59, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 739/2500 [56:09<2:19:15, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 740/2500 [56:14<2:22:22, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 741/2500 [56:19<2:22:17, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 742/2500 [56:25<2:32:14, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 743/2500 [56:30<2:29:38, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 744/2500 [56:36<2:45:32, 5.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 745/2500 [56:43<2:51:46, 5.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 746/2500 [56:49<2:53:40, 5.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 747/2500 [56:53<2:34:37, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 748/2500 [56:59<2:41:31, 5.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 749/2500 [57:03<2:30:43, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 750/2500 [57:09<2:39:41, 5.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 751/2500 [57:15<2:38:20, 5.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 752/2500 [57:18<2:23:19, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 753/2500 [57:23<2:20:22, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 754/2500 [57:29<2:29:21, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 755/2500 [57:33<2:23:42, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 756/2500 [57:40<2:35:30, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 757/2500 [57:45<2:33:24, 5.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 758/2500 [57:51<2:37:45, 5.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 759/2500 [57:59<3:04:27, 6.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 760/2500 [58:03<2:46:37, 5.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 761/2500 [58:11<3:02:46, 6.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 762/2500 [58:15<2:41:29, 5.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 763/2500 [58:20<2:37:08, 5.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 764/2500 [58:24<2:28:31, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 765/2500 [58:29<2:27:30, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 766/2500 [58:36<2:36:08, 5.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 767/2500 [58:40<2:24:21, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 768/2500 [58:44<2:19:10, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 769/2500 [58:48<2:09:26, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 770/2500 [58:54<2:29:27, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 771/2500 [59:00<2:36:02, 5.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 772/2500 [59:06<2:39:46, 5.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 773/2500 [59:11<2:31:55, 5.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 774/2500 [59:17<2:39:35, 5.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 775/2500 [59:23<2:40:38, 5.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 776/2500 [59:28<2:39:04, 5.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 777/2500 [59:33<2:30:53, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 778/2500 [59:37<2:24:44, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 779/2500 [59:42<2:23:12, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 780/2500 [59:46<2:10:59, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 781/2500 [59:51<2:18:35, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 782/2500 [59:57<2:23:57, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 783/2500 [1:00:01<2:19:06, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 784/2500 [1:00:05<2:08:30, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 785/2500 [1:00:11<2:20:55, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 786/2500 [1:00:17<2:35:22, 5.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 787/2500 [1:00:21<2:20:44, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 788/2500 [1:00:25<2:14:18, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 789/2500 [1:00:30<2:15:51, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 790/2500 [1:00:34<2:07:04, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 791/2500 [1:00:38<2:01:54, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 792/2500 [1:00:41<1:52:06, 3.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 793/2500 [1:00:45<1:51:08, 3.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 794/2500 [1:00:49<1:54:05, 4.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 795/2500 [1:00:54<2:03:35, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 796/2500 [1:01:00<2:11:27, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 797/2500 [1:01:05<2:17:49, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 798/2500 [1:01:09<2:12:13, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 799/2500 [1:01:13<2:05:53, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 800/2500 [1:01:19<2:19:32, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 801/2500 [1:01:23<2:11:37, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 802/2500 [1:01:28<2:15:25, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 803/2500 [1:01:34<2:20:14, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 804/2500 [1:01:39<2:26:48, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 805/2500 [1:01:45<2:27:13, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 806/2500 [1:01:51<2:36:40, 5.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 807/2500 [1:01:55<2:26:36, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 808/2500 [1:01:59<2:17:55, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 809/2500 [1:02:04<2:12:58, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 810/2500 [1:02:09<2:14:45, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 811/2500 [1:02:15<2:24:40, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 812/2500 [1:02:20<2:26:11, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 813/2500 [1:02:24<2:17:03, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 814/2500 [1:02:28<2:07:50, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 815/2500 [1:02:31<1:58:47, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 816/2500 [1:02:35<1:55:47, 4.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 817/2500 [1:02:43<2:29:08, 5.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 818/2500 [1:02:48<2:19:28, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 819/2500 [1:02:51<2:09:34, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 820/2500 [1:02:57<2:19:49, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 821/2500 [1:03:01<2:12:32, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 822/2500 [1:03:07<2:15:59, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 823/2500 [1:03:12<2:17:41, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 824/2500 [1:03:16<2:12:31, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 825/2500 [1:03:20<2:09:36, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 826/2500 [1:03:26<2:20:20, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 827/2500 [1:03:33<2:33:25, 5.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 828/2500 [1:03:37<2:23:44, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 829/2500 [1:03:42<2:24:23, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 830/2500 [1:03:47<2:20:59, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 831/2500 [1:03:53<2:29:49, 5.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 832/2500 [1:03:57<2:15:24, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 833/2500 [1:04:01<2:07:22, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 834/2500 [1:04:05<2:01:37, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 835/2500 [1:04:09<1:56:27, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 836/2500 [1:04:12<1:53:30, 4.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 837/2500 [1:04:16<1:46:06, 3.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 838/2500 [1:04:22<2:09:30, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 839/2500 [1:04:27<2:06:50, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 840/2500 [1:04:31<2:00:30, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 841/2500 [1:04:36<2:08:09, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 842/2500 [1:04:41<2:09:04, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 843/2500 [1:04:46<2:11:38, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 844/2500 [1:04:50<2:09:47, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 845/2500 [1:04:55<2:07:51, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 846/2500 [1:05:01<2:23:45, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 847/2500 [1:05:06<2:16:40, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 848/2500 [1:05:11<2:18:34, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 849/2500 [1:05:17<2:24:48, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 850/2500 [1:05:23<2:31:03, 5.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 851/2500 [1:05:26<2:15:20, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 852/2500 [1:05:30<2:07:00, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 853/2500 [1:05:34<2:01:36, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 854/2500 [1:05:39<2:04:44, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 855/2500 [1:05:46<2:25:49, 5.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 856/2500 [1:05:53<2:38:48, 5.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 857/2500 [1:05:57<2:26:27, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 858/2500 [1:06:04<2:34:55, 5.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 859/2500 [1:06:09<2:34:13, 5.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 860/2500 [1:06:13<2:17:30, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 861/2500 [1:06:17<2:14:28, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 862/2500 [1:06:21<1:59:54, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 863/2500 [1:06:24<1:47:24, 3.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 864/2500 [1:06:30<2:05:03, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 865/2500 [1:06:33<1:58:28, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 866/2500 [1:06:37<1:52:36, 4.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 867/2500 [1:06:41<1:52:58, 4.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 868/2500 [1:06:46<1:57:39, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 869/2500 [1:06:50<1:55:03, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 870/2500 [1:06:53<1:47:12, 3.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 871/2500 [1:06:58<1:55:56, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 872/2500 [1:07:03<1:56:27, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 873/2500 [1:07:07<1:54:24, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 874/2500 [1:07:12<1:59:30, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 875/2500 [1:07:16<1:55:53, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 876/2500 [1:07:21<2:02:27, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 877/2500 [1:07:25<1:58:00, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 878/2500 [1:07:31<2:12:20, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 879/2500 [1:07:37<2:19:31, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 880/2500 [1:07:41<2:12:26, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 881/2500 [1:07:46<2:10:55, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 882/2500 [1:07:49<2:02:47, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 883/2500 [1:07:53<1:55:09, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 884/2500 [1:07:59<2:07:14, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 885/2500 [1:08:02<1:55:53, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 886/2500 [1:08:06<1:53:33, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 887/2500 [1:08:10<1:54:10, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 888/2500 [1:08:17<2:10:14, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 889/2500 [1:08:21<2:06:39, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 890/2500 [1:08:26<2:07:47, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 891/2500 [1:08:30<2:03:06, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 892/2500 [1:08:36<2:10:46, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 893/2500 [1:08:39<1:59:53, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 894/2500 [1:08:44<2:05:24, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 895/2500 [1:08:51<2:16:49, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 896/2500 [1:08:54<2:03:17, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 897/2500 [1:08:58<2:01:11, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 898/2500 [1:09:04<2:08:12, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 899/2500 [1:09:07<1:58:54, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 900/2500 [1:09:13<2:05:40, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 901/2500 [1:09:16<1:57:55, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 902/2500 [1:09:21<1:58:46, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 903/2500 [1:09:27<2:14:35, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 904/2500 [1:09:31<2:04:45, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 905/2500 [1:09:36<2:03:34, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 906/2500 [1:09:40<1:59:00, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 907/2500 [1:09:44<1:56:45, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 908/2500 [1:09:48<1:51:52, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 909/2500 [1:09:51<1:45:05, 3.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 910/2500 [1:09:57<1:55:21, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 911/2500 [1:10:00<1:48:56, 4.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 912/2500 [1:10:05<1:52:22, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 913/2500 [1:10:08<1:46:40, 4.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 914/2500 [1:10:12<1:46:33, 4.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 915/2500 [1:10:18<2:00:30, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 916/2500 [1:10:25<2:20:22, 5.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 917/2500 [1:10:29<2:08:25, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 918/2500 [1:10:34<2:07:53, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 919/2500 [1:10:38<2:02:20, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 920/2500 [1:10:41<1:52:30, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 921/2500 [1:10:45<1:47:36, 4.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 922/2500 [1:10:49<1:49:14, 4.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 923/2500 [1:10:54<1:50:28, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 924/2500 [1:10:57<1:45:46, 4.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 925/2500 [1:11:01<1:41:25, 3.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 926/2500 [1:11:05<1:41:37, 3.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 927/2500 [1:11:09<1:48:29, 4.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 928/2500 [1:11:14<1:52:02, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 929/2500 [1:11:19<1:59:01, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 930/2500 [1:11:23<1:52:32, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 931/2500 [1:11:27<1:47:17, 4.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 932/2500 [1:11:30<1:43:00, 3.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 933/2500 [1:11:34<1:39:43, 3.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 934/2500 [1:11:38<1:43:28, 3.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 935/2500 [1:11:42<1:41:00, 3.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 936/2500 [1:11:47<1:53:30, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 937/2500 [1:11:54<2:10:40, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 938/2500 [1:11:59<2:14:37, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 939/2500 [1:12:04<2:08:19, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 940/2500 [1:12:08<2:04:08, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 941/2500 [1:12:12<2:00:05, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 942/2500 [1:12:16<1:51:17, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 943/2500 [1:12:19<1:41:49, 3.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 944/2500 [1:12:24<1:52:32, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 945/2500 [1:12:28<1:52:40, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 946/2500 [1:12:33<1:55:31, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 947/2500 [1:12:39<2:02:21, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 948/2500 [1:12:43<2:00:49, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 949/2500 [1:12:47<1:54:09, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 950/2500 [1:12:51<1:52:48, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 951/2500 [1:12:56<1:56:19, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 952/2500 [1:13:00<1:53:14, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 953/2500 [1:13:05<1:57:04, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 954/2500 [1:13:09<1:55:30, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 955/2500 [1:13:13<1:46:03, 4.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 956/2500 [1:13:16<1:43:22, 4.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 957/2500 [1:13:20<1:41:06, 3.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 958/2500 [1:13:25<1:48:15, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 959/2500 [1:13:30<1:56:16, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 960/2500 [1:13:35<1:59:54, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 961/2500 [1:13:39<1:49:21, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 962/2500 [1:13:45<2:06:44, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▊ | 963/2500 [1:13:48<1:54:14, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▊ | 964/2500 [1:13:54<2:05:49, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▊ | 965/2500 [1:13:59<2:00:08, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▊ | 966/2500 [1:14:03<1:55:11, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▊ | 967/2500 [1:14:09<2:05:40, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▊ | 968/2500 [1:14:15<2:15:39, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 969/2500 [1:14:19<2:07:40, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 970/2500 [1:14:25<2:12:11, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 971/2500 [1:14:29<2:07:44, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 972/2500 [1:14:34<2:05:35, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 973/2500 [1:14:38<1:59:13, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 974/2500 [1:14:42<1:52:13, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 975/2500 [1:14:45<1:44:34, 4.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 976/2500 [1:14:49<1:41:02, 3.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 977/2500 [1:14:52<1:36:38, 3.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 978/2500 [1:14:58<1:48:55, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 979/2500 [1:15:03<1:52:09, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 980/2500 [1:15:07<1:49:41, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 981/2500 [1:15:11<1:50:44, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 982/2500 [1:15:17<2:01:21, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 983/2500 [1:15:21<1:58:42, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 984/2500 [1:15:25<1:48:10, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 985/2500 [1:15:31<2:04:16, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 986/2500 [1:15:36<2:03:45, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 987/2500 [1:15:40<1:59:25, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 988/2500 [1:15:44<1:50:36, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 989/2500 [1:15:49<1:59:36, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 990/2500 [1:15:53<1:53:27, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 991/2500 [1:15:57<1:43:40, 4.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 992/2500 [1:16:03<2:00:00, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 993/2500 [1:16:08<1:59:01, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 994/2500 [1:16:12<1:58:40, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 995/2500 [1:16:16<1:48:41, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 996/2500 [1:16:19<1:43:33, 4.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 997/2500 [1:16:24<1:45:28, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 998/2500 [1:16:29<1:49:23, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 999/2500 [1:16:33<1:52:49, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1000/2500 [1:16:39<1:58:41, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1001/2500 [1:16:42<1:51:23, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1002/2500 [1:16:48<1:58:09, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1003/2500 [1:16:52<1:55:27, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1004/2500 [1:16:56<1:48:13, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1005/2500 [1:17:02<1:58:24, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1006/2500 [1:17:07<1:59:45, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1007/2500 [1:17:11<1:54:55, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1008/2500 [1:17:15<1:52:44, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1009/2500 [1:17:21<2:03:41, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1010/2500 [1:17:27<2:14:37, 5.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1011/2500 [1:17:33<2:14:32, 5.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1012/2500 [1:17:37<2:02:35, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1013/2500 [1:17:42<2:04:33, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1014/2500 [1:17:45<1:51:19, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1015/2500 [1:17:49<1:47:44, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1016/2500 [1:17:52<1:38:46, 3.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1017/2500 [1:17:57<1:45:07, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1018/2500 [1:18:01<1:44:51, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1019/2500 [1:18:05<1:40:08, 4.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1020/2500 [1:18:09<1:36:28, 3.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1021/2500 [1:18:13<1:42:26, 4.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1022/2500 [1:18:17<1:35:13, 3.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1023/2500 [1:18:23<1:50:20, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1024/2500 [1:18:28<1:55:10, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1025/2500 [1:18:31<1:44:55, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1026/2500 [1:18:36<1:50:32, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1027/2500 [1:18:40<1:50:23, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1028/2500 [1:18:45<1:50:07, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1029/2500 [1:18:49<1:46:33, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1030/2500 [1:18:55<1:56:37, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1031/2500 [1:18:59<1:55:54, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 1032/2500 [1:19:04<1:51:34, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 1033/2500 [1:19:08<1:51:53, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 1034/2500 [1:19:14<1:58:48, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 1035/2500 [1:19:18<1:58:17, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 1036/2500 [1:19:22<1:49:22, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 1037/2500 [1:19:27<1:48:40, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1038/2500 [1:19:30<1:43:49, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1039/2500 [1:19:37<1:58:28, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1040/2500 [1:19:43<2:06:45, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1041/2500 [1:19:47<2:00:52, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1042/2500 [1:19:51<1:51:26, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1043/2500 [1:19:55<1:51:04, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1044/2500 [1:20:01<1:58:52, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1045/2500 [1:20:06<2:00:39, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1046/2500 [1:20:10<1:49:34, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1047/2500 [1:20:14<1:51:38, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1048/2500 [1:20:19<1:49:59, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1049/2500 [1:20:24<1:51:51, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1050/2500 [1:20:28<1:47:34, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1051/2500 [1:20:33<1:57:05, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1052/2500 [1:20:38<1:59:02, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1053/2500 [1:20:47<2:21:19, 5.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1054/2500 [1:20:50<2:01:52, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1055/2500 [1:20:55<2:05:18, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1056/2500 [1:20:58<1:50:46, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1057/2500 [1:21:02<1:45:05, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1058/2500 [1:21:07<1:44:59, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1059/2500 [1:21:11<1:47:23, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1060/2500 [1:21:16<1:46:00, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1061/2500 [1:21:19<1:40:57, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1062/2500 [1:21:25<1:49:01, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1063/2500 [1:21:29<1:50:16, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1064/2500 [1:21:35<1:56:05, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1065/2500 [1:21:38<1:45:52, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1066/2500 [1:21:42<1:40:32, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1067/2500 [1:21:47<1:42:37, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1068/2500 [1:21:51<1:40:45, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1069/2500 [1:21:55<1:44:47, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1070/2500 [1:22:01<1:56:27, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1071/2500 [1:22:07<1:59:14, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1072/2500 [1:22:11<1:57:38, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1073/2500 [1:22:17<1:58:32, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1074/2500 [1:22:21<1:54:41, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1075/2500 [1:22:25<1:50:40, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1076/2500 [1:22:29<1:44:50, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1077/2500 [1:22:33<1:41:44, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1078/2500 [1:22:37<1:40:14, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1079/2500 [1:22:41<1:36:26, 4.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1080/2500 [1:22:45<1:39:54, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1081/2500 [1:22:50<1:44:34, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1082/2500 [1:22:53<1:34:17, 3.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1083/2500 [1:22:59<1:42:37, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1084/2500 [1:23:03<1:40:08, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1085/2500 [1:23:06<1:31:44, 3.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1086/2500 [1:23:11<1:39:25, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1087/2500 [1:23:15<1:38:17, 4.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1088/2500 [1:23:18<1:35:02, 4.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1089/2500 [1:23:22<1:32:30, 3.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1090/2500 [1:23:25<1:26:42, 3.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1091/2500 [1:23:29<1:30:12, 3.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1092/2500 [1:23:36<1:46:21, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1093/2500 [1:23:42<1:58:09, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1094/2500 [1:23:57<3:07:24, 8.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1095/2500 [1:24:05<3:11:15, 8.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1096/2500 [1:24:12<3:04:49, 7.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1097/2500 [1:24:16<2:33:15, 6.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1098/2500 [1:24:19<2:11:44, 5.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1099/2500 [1:24:23<1:56:48, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1100/2500 [1:24:27<1:47:28, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1101/2500 [1:24:31<1:48:26, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1102/2500 [1:24:36<1:45:57, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1103/2500 [1:24:41<1:48:59, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1104/2500 [1:24:44<1:40:39, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1105/2500 [1:24:49<1:41:30, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1106/2500 [1:24:53<1:39:40, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1107/2500 [1:24:57<1:39:52, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1108/2500 [1:25:01<1:34:27, 4.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1109/2500 [1:25:05<1:36:15, 4.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1110/2500 [1:25:09<1:34:55, 4.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1111/2500 [1:25:13<1:37:35, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1112/2500 [1:25:17<1:36:40, 4.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1113/2500 [1:25:21<1:33:32, 4.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1114/2500 [1:25:27<1:43:53, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1115/2500 [1:25:31<1:44:01, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1116/2500 [1:25:35<1:41:48, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1117/2500 [1:25:42<1:59:01, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1118/2500 [1:25:46<1:48:12, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1119/2500 [1:25:53<2:02:25, 5.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1120/2500 [1:25:56<1:49:49, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1121/2500 [1:26:01<1:49:32, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1122/2500 [1:26:06<1:49:09, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1123/2500 [1:26:10<1:43:24, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1124/2500 [1:26:14<1:41:04, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1125/2500 [1:26:18<1:36:44, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1126/2500 [1:26:22<1:34:14, 4.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1127/2500 [1:26:26<1:36:23, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1128/2500 [1:26:30<1:36:59, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1129/2500 [1:26:34<1:32:26, 4.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1130/2500 [1:26:38<1:30:05, 3.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1131/2500 [1:26:42<1:36:39, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1132/2500 [1:26:46<1:34:36, 4.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1133/2500 [1:26:52<1:43:46, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1134/2500 [1:26:58<1:53:27, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1135/2500 [1:27:02<1:48:37, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1136/2500 [1:27:07<1:47:53, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1137/2500 [1:27:11<1:41:00, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1138/2500 [1:27:14<1:35:19, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1139/2500 [1:27:18<1:34:28, 4.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1140/2500 [1:27:23<1:36:39, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1141/2500 [1:27:27<1:34:11, 4.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1142/2500 [1:27:31<1:36:24, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1143/2500 [1:27:38<1:56:40, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1144/2500 [1:27:45<2:02:49, 5.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1145/2500 [1:27:51<2:12:28, 5.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1146/2500 [1:27:56<2:04:56, 5.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1147/2500 [1:28:00<1:53:05, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1148/2500 [1:28:05<1:50:20, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1149/2500 [1:28:10<1:52:04, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1150/2500 [1:28:14<1:46:22, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1151/2500 [1:28:19<1:48:28, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1152/2500 [1:28:22<1:39:10, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1153/2500 [1:28:29<1:50:17, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1154/2500 [1:28:33<1:49:38, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1155/2500 [1:28:38<1:49:46, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1156/2500 [1:28:43<1:49:41, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1157/2500 [1:28:49<1:56:31, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1158/2500 [1:28:55<1:59:02, 5.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1159/2500 [1:28:59<1:54:47, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1160/2500 [1:29:04<1:53:39, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1161/2500 [1:29:12<2:12:11, 5.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1162/2500 [1:29:17<2:05:01, 5.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1163/2500 [1:29:21<1:54:18, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1164/2500 [1:29:25<1:47:02, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1165/2500 [1:29:30<1:50:16, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1166/2500 [1:29:36<1:54:32, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1167/2500 [1:29:41<1:54:12, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1168/2500 [1:29:45<1:47:25, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1169/2500 [1:29:49<1:42:36, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1170/2500 [1:29:55<1:45:26, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1171/2500 [1:29:59<1:46:09, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1172/2500 [1:30:04<1:42:58, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1173/2500 [1:30:08<1:39:54, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1174/2500 [1:30:12<1:39:53, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1175/2500 [1:30:18<1:43:18, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1176/2500 [1:30:21<1:38:37, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1177/2500 [1:30:28<1:54:26, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1178/2500 [1:30:35<2:01:17, 5.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1179/2500 [1:30:38<1:49:04, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1180/2500 [1:30:43<1:45:20, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1181/2500 [1:30:46<1:37:21, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1182/2500 [1:30:51<1:37:28, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1183/2500 [1:30:55<1:37:16, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1184/2500 [1:31:01<1:48:32, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1185/2500 [1:31:06<1:48:03, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1186/2500 [1:31:10<1:38:05, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1187/2500 [1:31:15<1:40:52, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1188/2500 [1:31:19<1:39:32, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1189/2500 [1:31:23<1:34:28, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1190/2500 [1:31:27<1:37:06, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1191/2500 [1:31:32<1:40:43, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1192/2500 [1:31:37<1:39:21, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1193/2500 [1:31:41<1:37:58, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1194/2500 [1:31:46<1:37:26, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1195/2500 [1:31:49<1:32:35, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1196/2500 [1:31:54<1:32:59, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1197/2500 [1:32:00<1:47:03, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1198/2500 [1:32:04<1:42:08, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1199/2500 [1:32:09<1:44:25, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1200/2500 [1:32:15<1:49:55, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1201/2500 [1:32:20<1:47:06, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1202/2500 [1:32:24<1:41:19, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1203/2500 [1:32:28<1:34:39, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1204/2500 [1:32:32<1:36:41, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1205/2500 [1:32:37<1:37:24, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1206/2500 [1:32:41<1:35:50, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1207/2500 [1:32:46<1:36:24, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1208/2500 [1:32:51<1:41:21, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1209/2500 [1:32:55<1:36:08, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1210/2500 [1:32:59<1:35:09, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1211/2500 [1:33:05<1:43:42, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1212/2500 [1:33:09<1:36:13, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▊ | 1213/2500 [1:33:15<1:47:11, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▊ | 1214/2500 [1:33:20<1:49:13, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▊ | 1215/2500 [1:33:25<1:47:34, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▊ | 1216/2500 [1:33:32<1:59:33, 5.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▊ | 1217/2500 [1:33:37<1:59:19, 5.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▊ | 1218/2500 [1:33:42<1:51:12, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1219/2500 [1:33:45<1:41:08, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1220/2500 [1:33:50<1:40:55, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1221/2500 [1:33:54<1:37:49, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1222/2500 [1:34:01<1:52:50, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1223/2500 [1:34:05<1:43:31, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1224/2500 [1:34:10<1:42:52, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1225/2500 [1:34:14<1:36:49, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1226/2500 [1:34:18<1:35:53, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1227/2500 [1:34:23<1:35:21, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1228/2500 [1:34:27<1:31:26, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1229/2500 [1:34:32<1:39:26, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1230/2500 [1:34:37<1:39:47, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1231/2500 [1:34:42<1:44:17, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1232/2500 [1:34:48<1:51:00, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1233/2500 [1:34:54<1:50:55, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1234/2500 [1:34:58<1:45:45, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1235/2500 [1:35:04<1:50:58, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1236/2500 [1:35:08<1:43:54, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1237/2500 [1:35:13<1:45:38, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1238/2500 [1:35:17<1:37:25, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1239/2500 [1:35:21<1:34:37, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1240/2500 [1:35:26<1:38:57, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1241/2500 [1:35:31<1:36:46, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1242/2500 [1:35:36<1:39:31, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1243/2500 [1:35:41<1:42:11, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1244/2500 [1:35:45<1:37:34, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1245/2500 [1:35:50<1:35:46, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1246/2500 [1:35:57<1:53:10, 5.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1247/2500 [1:36:01<1:41:08, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1248/2500 [1:36:06<1:43:30, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1249/2500 [1:36:11<1:46:28, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1250/2500 [1:36:15<1:39:13, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1251/2500 [1:36:20<1:39:05, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1252/2500 [1:36:27<1:52:56, 5.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1253/2500 [1:36:31<1:46:35, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1254/2500 [1:36:36<1:40:40, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1255/2500 [1:36:40<1:35:19, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1256/2500 [1:36:43<1:27:56, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1257/2500 [1:36:49<1:41:44, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1258/2500 [1:36:54<1:37:06, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1259/2500 [1:37:00<1:45:32, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1260/2500 [1:37:03<1:32:32, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1261/2500 [1:37:07<1:30:21, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1262/2500 [1:37:11<1:29:58, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1263/2500 [1:37:15<1:25:57, 4.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1264/2500 [1:37:20<1:31:18, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1265/2500 [1:37:24<1:29:20, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1266/2500 [1:37:28<1:24:27, 4.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1267/2500 [1:37:32<1:27:24, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1268/2500 [1:37:36<1:27:20, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1269/2500 [1:37:41<1:26:36, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1270/2500 [1:37:45<1:26:53, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1271/2500 [1:37:49<1:28:00, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1272/2500 [1:37:54<1:29:05, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1273/2500 [1:37:59<1:33:42, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1274/2500 [1:38:04<1:35:27, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1275/2500 [1:38:08<1:33:06, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1276/2500 [1:38:12<1:29:12, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1277/2500 [1:38:16<1:29:23, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1278/2500 [1:38:20<1:25:24, 4.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1279/2500 [1:38:23<1:17:16, 3.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1280/2500 [1:38:27<1:18:00, 3.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1281/2500 [1:38:31<1:16:55, 3.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 1282/2500 [1:38:34<1:16:04, 3.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 1283/2500 [1:38:38<1:15:21, 3.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 1284/2500 [1:38:42<1:18:17, 3.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 1285/2500 [1:38:47<1:23:22, 4.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 1286/2500 [1:38:50<1:18:09, 3.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 1287/2500 [1:38:53<1:13:59, 3.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1288/2500 [1:38:58<1:19:38, 3.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1289/2500 [1:39:01<1:14:17, 3.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1290/2500 [1:39:06<1:24:22, 4.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1291/2500 [1:39:14<1:47:25, 5.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1292/2500 [1:39:19<1:46:00, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1293/2500 [1:39:25<1:48:47, 5.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1294/2500 [1:39:29<1:40:08, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1295/2500 [1:39:34<1:41:48, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1296/2500 [1:39:39<1:39:56, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1297/2500 [1:39:43<1:33:45, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1298/2500 [1:39:49<1:37:55, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1299/2500 [1:39:54<1:43:44, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1300/2500 [1:39:58<1:33:53, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1301/2500 [1:40:01<1:23:31, 4.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1302/2500 [1:40:07<1:33:28, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1303/2500 [1:40:12<1:35:20, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1304/2500 [1:40:16<1:33:19, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1305/2500 [1:40:22<1:36:13, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1306/2500 [1:40:25<1:27:28, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1307/2500 [1:40:29<1:27:15, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1308/2500 [1:40:33<1:20:53, 4.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1309/2500 [1:40:36<1:16:42, 3.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1310/2500 [1:40:41<1:25:17, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1311/2500 [1:40:45<1:21:41, 4.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1312/2500 [1:40:49<1:22:07, 4.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1313/2500 [1:40:53<1:20:58, 4.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1314/2500 [1:40:57<1:16:37, 3.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1315/2500 [1:41:01<1:19:02, 4.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1316/2500 [1:41:04<1:15:42, 3.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1317/2500 [1:41:08<1:16:55, 3.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1318/2500 [1:41:13<1:22:01, 4.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1319/2500 [1:41:17<1:18:03, 3.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1320/2500 [1:41:21<1:20:57, 4.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1321/2500 [1:41:25<1:18:31, 4.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1322/2500 [1:41:30<1:23:34, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1323/2500 [1:41:33<1:19:25, 4.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1324/2500 [1:41:38<1:21:04, 4.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1325/2500 [1:41:41<1:18:26, 4.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1326/2500 [1:41:46<1:20:55, 4.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1327/2500 [1:41:49<1:15:50, 3.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1328/2500 [1:41:54<1:21:52, 4.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1329/2500 [1:41:58<1:19:57, 4.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1330/2500 [1:42:03<1:26:38, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1331/2500 [1:42:07<1:24:27, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1332/2500 [1:42:12<1:29:39, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1333/2500 [1:42:16<1:21:08, 4.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1334/2500 [1:42:19<1:17:38, 4.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1335/2500 [1:42:25<1:28:15, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1336/2500 [1:42:28<1:21:53, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1337/2500 [1:42:34<1:27:14, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1338/2500 [1:42:37<1:22:56, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1339/2500 [1:42:41<1:17:14, 3.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1340/2500 [1:42:44<1:12:34, 3.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1341/2500 [1:42:48<1:12:24, 3.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1342/2500 [1:42:51<1:08:06, 3.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1343/2500 [1:42:58<1:28:11, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1344/2500 [1:43:02<1:28:51, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1345/2500 [1:43:06<1:23:01, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1346/2500 [1:43:10<1:19:44, 4.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1347/2500 [1:43:14<1:18:32, 4.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1348/2500 [1:43:18<1:19:33, 4.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1349/2500 [1:43:22<1:20:48, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1350/2500 [1:43:26<1:17:11, 4.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1351/2500 [1:43:31<1:25:14, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1352/2500 [1:43:36<1:24:49, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1353/2500 [1:43:43<1:43:46, 5.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1354/2500 [1:43:47<1:32:57, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1355/2500 [1:43:51<1:30:11, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1356/2500 [1:43:56<1:30:06, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1357/2500 [1:44:02<1:36:43, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1358/2500 [1:44:06<1:32:45, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1359/2500 [1:44:11<1:28:29, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1360/2500 [1:44:15<1:27:16, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1361/2500 [1:44:19<1:25:19, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1362/2500 [1:44:25<1:31:18, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1363/2500 [1:44:29<1:25:22, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1364/2500 [1:44:34<1:28:01, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1365/2500 [1:44:38<1:27:54, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1366/2500 [1:44:45<1:38:26, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1367/2500 [1:44:48<1:27:57, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1368/2500 [1:44:53<1:28:47, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1369/2500 [1:44:58<1:32:48, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1370/2500 [1:45:02<1:24:52, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1371/2500 [1:45:08<1:32:05, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1372/2500 [1:45:12<1:28:14, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1373/2500 [1:45:17<1:29:20, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1374/2500 [1:45:21<1:26:18, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1375/2500 [1:45:26<1:28:46, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1376/2500 [1:45:32<1:37:18, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1377/2500 [1:45:36<1:29:14, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1378/2500 [1:45:42<1:33:59, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1379/2500 [1:45:48<1:37:53, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1380/2500 [1:45:56<1:53:51, 6.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1381/2500 [1:46:01<1:49:37, 5.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1382/2500 [1:46:07<1:48:42, 5.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1383/2500 [1:46:12<1:45:23, 5.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1384/2500 [1:46:17<1:40:02, 5.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1385/2500 [1:46:22<1:41:24, 5.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1386/2500 [1:46:27<1:38:06, 5.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1387/2500 [1:46:33<1:38:53, 5.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1388/2500 [1:46:37<1:32:39, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1389/2500 [1:46:41<1:28:38, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1390/2500 [1:46:45<1:24:46, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1391/2500 [1:46:51<1:29:59, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1392/2500 [1:46:54<1:23:07, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1393/2500 [1:46:58<1:19:51, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1394/2500 [1:47:03<1:23:59, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1395/2500 [1:47:07<1:18:40, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1396/2500 [1:47:11<1:18:09, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1397/2500 [1:47:16<1:19:04, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1398/2500 [1:47:20<1:16:10, 4.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1399/2500 [1:47:23<1:13:53, 4.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1400/2500 [1:47:27<1:12:29, 3.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1401/2500 [1:47:34<1:30:18, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1402/2500 [1:47:39<1:31:08, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1403/2500 [1:47:43<1:25:56, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1404/2500 [1:47:48<1:23:45, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1405/2500 [1:47:53<1:27:52, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1406/2500 [1:47:58<1:28:04, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1407/2500 [1:48:01<1:21:04, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1408/2500 [1:48:06<1:19:34, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1409/2500 [1:48:10<1:16:58, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1410/2500 [1:48:15<1:21:05, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1411/2500 [1:48:18<1:15:05, 4.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1412/2500 [1:48:23<1:17:47, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1413/2500 [1:48:29<1:27:05, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1414/2500 [1:48:32<1:21:29, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1415/2500 [1:48:39<1:32:17, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1416/2500 [1:48:43<1:27:43, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1417/2500 [1:48:46<1:17:36, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1418/2500 [1:48:53<1:28:30, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1419/2500 [1:48:58<1:31:22, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1420/2500 [1:49:01<1:21:05, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1421/2500 [1:49:05<1:16:06, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1422/2500 [1:49:09<1:14:08, 4.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1423/2500 [1:49:12<1:11:20, 3.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1424/2500 [1:49:16<1:12:08, 4.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1425/2500 [1:49:22<1:19:59, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1426/2500 [1:49:27<1:25:15, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1427/2500 [1:49:33<1:31:22, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1428/2500 [1:49:39<1:35:43, 5.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1429/2500 [1:49:44<1:30:46, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1430/2500 [1:49:52<1:47:51, 6.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1431/2500 [1:49:56<1:38:17, 5.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1432/2500 [1:50:01<1:35:12, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1433/2500 [1:50:07<1:37:07, 5.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1434/2500 [1:50:11<1:28:28, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1435/2500 [1:50:15<1:22:50, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1436/2500 [1:50:19<1:19:10, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1437/2500 [1:50:23<1:18:31, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1438/2500 [1:50:27<1:17:39, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1439/2500 [1:50:34<1:29:31, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1440/2500 [1:50:39<1:26:48, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1441/2500 [1:50:45<1:33:28, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1442/2500 [1:50:51<1:39:40, 5.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1443/2500 [1:50:58<1:45:22, 5.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1444/2500 [1:51:03<1:40:46, 5.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1445/2500 [1:51:08<1:36:11, 5.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1446/2500 [1:51:13<1:31:59, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1447/2500 [1:51:18<1:33:05, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1448/2500 [1:51:23<1:28:50, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1449/2500 [1:51:27<1:27:17, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1450/2500 [1:51:34<1:38:07, 5.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1451/2500 [1:51:39<1:29:53, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1452/2500 [1:51:43<1:23:45, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1453/2500 [1:51:46<1:19:16, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1454/2500 [1:51:53<1:31:55, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1455/2500 [1:51:59<1:34:40, 5.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1456/2500 [1:52:05<1:37:17, 5.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1457/2500 [1:52:09<1:29:11, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1458/2500 [1:52:13<1:22:31, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1459/2500 [1:52:19<1:26:00, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1460/2500 [1:52:22<1:19:46, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1461/2500 [1:52:27<1:22:26, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1462/2500 [1:52:32<1:19:18, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▊ | 1463/2500 [1:52:35<1:15:12, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▊ | 1464/2500 [1:52:42<1:26:06, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▊ | 1465/2500 [1:52:47<1:24:14, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▊ | 1466/2500 [1:52:51<1:20:16, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▊ | 1467/2500 [1:52:58<1:33:48, 5.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▊ | 1468/2500 [1:53:04<1:37:02, 5.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1469/2500 [1:53:10<1:36:45, 5.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1470/2500 [1:53:15<1:36:54, 5.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1471/2500 [1:53:20<1:29:49, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1472/2500 [1:53:26<1:33:10, 5.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1473/2500 [1:53:31<1:33:01, 5.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1474/2500 [1:53:36<1:28:58, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1475/2500 [1:53:40<1:26:51, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1476/2500 [1:53:46<1:29:05, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1477/2500 [1:53:51<1:27:39, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1478/2500 [1:53:55<1:21:30, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1479/2500 [1:54:00<1:25:18, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1480/2500 [1:54:04<1:19:09, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1481/2500 [1:54:10<1:25:34, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1482/2500 [1:54:14<1:19:15, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1483/2500 [1:54:18<1:15:36, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1484/2500 [1:54:22<1:13:38, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1485/2500 [1:54:26<1:10:18, 4.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1486/2500 [1:54:30<1:12:28, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1487/2500 [1:54:35<1:13:56, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1488/2500 [1:54:39<1:14:14, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1489/2500 [1:54:44<1:13:40, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1490/2500 [1:54:48<1:12:48, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1491/2500 [1:54:55<1:25:19, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1492/2500 [1:54:59<1:20:20, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1493/2500 [1:55:05<1:25:02, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1494/2500 [1:55:09<1:19:40, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1495/2500 [1:55:15<1:27:41, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1496/2500 [1:55:19<1:22:24, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1497/2500 [1:55:25<1:27:22, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1498/2500 [1:55:28<1:17:41, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1499/2500 [1:55:32<1:11:39, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1500/2500 [1:55:37<1:14:04, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1501/2500 [1:55:41<1:11:16, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1502/2500 [1:55:46<1:14:21, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1503/2500 [1:55:51<1:17:50, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1504/2500 [1:55:56<1:21:46, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1505/2500 [1:56:02<1:27:23, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1506/2500 [1:56:07<1:25:44, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1507/2500 [1:56:13<1:27:53, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1508/2500 [1:56:19<1:31:24, 5.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1509/2500 [1:56:24<1:28:46, 5.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1510/2500 [1:56:30<1:30:26, 5.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1511/2500 [1:56:34<1:22:43, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1512/2500 [1:56:38<1:19:13, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1513/2500 [1:56:45<1:29:35, 5.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1514/2500 [1:56:50<1:26:41, 5.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1515/2500 [1:56:55<1:26:15, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1516/2500 [1:56:59<1:21:31, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1517/2500 [1:57:03<1:16:19, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1518/2500 [1:57:09<1:19:53, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1519/2500 [1:57:14<1:21:37, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1520/2500 [1:57:19<1:22:57, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1521/2500 [1:57:24<1:21:39, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1522/2500 [1:57:30<1:29:16, 5.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1523/2500 [1:57:34<1:21:54, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1524/2500 [1:57:39<1:20:36, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1525/2500 [1:57:44<1:20:44, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1526/2500 [1:57:47<1:12:09, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1527/2500 [1:57:51<1:08:43, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1528/2500 [1:57:58<1:23:03, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1529/2500 [1:58:02<1:17:23, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1530/2500 [1:58:07<1:14:47, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1531/2500 [1:58:11<1:11:33, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 1532/2500 [1:58:15<1:11:53, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 1533/2500 [1:58:21<1:16:30, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 1534/2500 [1:58:25<1:12:43, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 1535/2500 [1:58:29<1:09:52, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 1536/2500 [1:58:32<1:05:51, 4.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 1537/2500 [1:58:38<1:14:05, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1538/2500 [1:58:43<1:16:33, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1539/2500 [1:58:47<1:13:33, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1540/2500 [1:58:53<1:20:23, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1541/2500 [1:58:58<1:18:30, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1542/2500 [1:59:05<1:28:59, 5.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1543/2500 [1:59:09<1:21:18, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1544/2500 [1:59:12<1:13:23, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1545/2500 [1:59:16<1:10:33, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1546/2500 [1:59:20<1:08:06, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1547/2500 [1:59:23<1:02:25, 3.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1548/2500 [1:59:30<1:14:04, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1549/2500 [1:59:34<1:10:58, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1550/2500 [1:59:38<1:06:52, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1551/2500 [1:59:41<1:02:56, 3.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1552/2500 [1:59:45<1:03:44, 4.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1553/2500 [1:59:50<1:09:52, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1554/2500 [1:59:57<1:22:05, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1555/2500 [2:00:02<1:19:28, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1556/2500 [2:00:07<1:16:45, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1557/2500 [2:00:12<1:20:33, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1558/2500 [2:00:17<1:18:47, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1559/2500 [2:00:22<1:17:10, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1560/2500 [2:00:28<1:21:09, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1561/2500 [2:00:33<1:22:39, 5.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1562/2500 [2:00:39<1:23:38, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1563/2500 [2:00:43<1:20:33, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1564/2500 [2:00:47<1:14:18, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1565/2500 [2:00:51<1:12:01, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1566/2500 [2:00:57<1:14:20, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1567/2500 [2:01:00<1:07:18, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1568/2500 [2:01:06<1:14:05, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1569/2500 [2:01:12<1:23:21, 5.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1570/2500 [2:01:16<1:15:49, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1571/2500 [2:01:22<1:20:00, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1572/2500 [2:01:26<1:14:52, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1573/2500 [2:01:30<1:10:03, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1574/2500 [2:01:34<1:08:09, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1575/2500 [2:01:39<1:11:32, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1576/2500 [2:01:44<1:11:47, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1577/2500 [2:01:49<1:14:20, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1578/2500 [2:01:54<1:12:59, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1579/2500 [2:01:58<1:09:44, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1580/2500 [2:02:04<1:17:46, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1581/2500 [2:02:08<1:13:32, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1582/2500 [2:02:14<1:17:44, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1583/2500 [2:02:18<1:11:23, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1584/2500 [2:02:22<1:09:29, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1585/2500 [2:02:26<1:05:32, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1586/2500 [2:02:29<1:00:31, 3.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1587/2500 [2:02:34<1:04:55, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1588/2500 [2:02:38<1:05:28, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1589/2500 [2:02:43<1:05:09, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1590/2500 [2:02:48<1:08:17, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1591/2500 [2:02:51<1:03:46, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1592/2500 [2:02:56<1:05:57, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1593/2500 [2:03:00<1:06:14, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1594/2500 [2:03:05<1:07:14, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1595/2500 [2:03:08<1:00:28, 4.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1596/2500 [2:03:12<1:00:27, 4.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1597/2500 [2:03:15<57:27, 3.82s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1598/2500 [2:03:18<55:07, 3.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1599/2500 [2:03:22<54:20, 3.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1600/2500 [2:03:27<59:43, 3.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1601/2500 [2:03:30<55:21, 3.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1602/2500 [2:03:35<1:04:00, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1603/2500 [2:03:42<1:12:41, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1604/2500 [2:03:45<1:06:04, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1605/2500 [2:03:49<1:02:19, 4.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1606/2500 [2:03:52<1:00:28, 4.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1607/2500 [2:03:57<1:01:43, 4.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1608/2500 [2:04:02<1:04:52, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1609/2500 [2:04:07<1:07:17, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1610/2500 [2:04:11<1:04:22, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1611/2500 [2:04:14<58:43, 3.96s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1612/2500 [2:04:18<59:34, 4.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1613/2500 [2:04:22<59:00, 3.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1614/2500 [2:04:26<58:13, 3.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1615/2500 [2:04:31<1:07:02, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1616/2500 [2:04:36<1:08:31, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1617/2500 [2:04:40<1:03:44, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1618/2500 [2:04:44<1:02:29, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1619/2500 [2:04:49<1:07:26, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1620/2500 [2:04:56<1:14:07, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1621/2500 [2:05:00<1:10:09, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1622/2500 [2:05:07<1:19:49, 5.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1623/2500 [2:05:13<1:22:45, 5.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1624/2500 [2:05:19<1:24:45, 5.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1625/2500 [2:05:23<1:16:01, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1626/2500 [2:05:30<1:26:02, 5.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1627/2500 [2:05:34<1:15:59, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1628/2500 [2:05:39<1:15:54, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1629/2500 [2:05:44<1:15:43, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1630/2500 [2:05:49<1:11:02, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1631/2500 [2:05:53<1:06:53, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1632/2500 [2:05:57<1:04:12, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1633/2500 [2:06:01<1:04:18, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1634/2500 [2:06:05<1:02:50, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1635/2500 [2:06:10<1:05:18, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1636/2500 [2:06:14<1:02:41, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1637/2500 [2:06:20<1:08:48, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1638/2500 [2:06:23<1:02:57, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1639/2500 [2:06:30<1:13:27, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1640/2500 [2:06:36<1:16:55, 5.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1641/2500 [2:06:43<1:23:30, 5.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1642/2500 [2:06:47<1:16:04, 5.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1643/2500 [2:06:54<1:23:18, 5.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1644/2500 [2:06:59<1:18:26, 5.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1645/2500 [2:07:03<1:12:13, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1646/2500 [2:07:07<1:06:47, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1647/2500 [2:07:12<1:07:34, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1648/2500 [2:07:15<1:01:44, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1649/2500 [2:07:20<1:02:24, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1650/2500 [2:07:25<1:05:07, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1651/2500 [2:07:30<1:08:12, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1652/2500 [2:07:35<1:10:55, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1653/2500 [2:07:41<1:13:58, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1654/2500 [2:07:47<1:14:47, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1655/2500 [2:07:52<1:15:56, 5.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1656/2500 [2:07:56<1:08:44, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1657/2500 [2:08:01<1:08:21, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1658/2500 [2:08:05<1:03:51, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1659/2500 [2:08:09<1:03:54, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1660/2500 [2:08:14<1:03:08, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1661/2500 [2:08:18<1:02:23, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1662/2500 [2:08:23<1:06:10, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1663/2500 [2:08:28<1:05:05, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1664/2500 [2:08:32<1:03:02, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1665/2500 [2:08:36<59:34, 4.28s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1666/2500 [2:08:39<55:26, 3.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1667/2500 [2:08:44<1:01:36, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1668/2500 [2:08:48<57:59, 4.18s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1669/2500 [2:08:54<1:06:26, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1670/2500 [2:09:00<1:09:07, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1671/2500 [2:09:04<1:05:18, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1672/2500 [2:09:08<1:01:05, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1673/2500 [2:09:14<1:10:40, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1674/2500 [2:09:19<1:09:03, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1675/2500 [2:09:23<1:04:08, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1676/2500 [2:09:26<58:15, 4.24s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1677/2500 [2:09:30<55:25, 4.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1678/2500 [2:09:33<53:22, 3.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1679/2500 [2:09:38<55:44, 4.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1680/2500 [2:09:41<53:01, 3.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1681/2500 [2:09:46<54:47, 4.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1682/2500 [2:09:51<1:00:22, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1683/2500 [2:09:57<1:05:47, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1684/2500 [2:10:03<1:11:37, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1685/2500 [2:10:09<1:15:59, 5.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1686/2500 [2:10:13<1:09:31, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1687/2500 [2:10:17<1:02:38, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1688/2500 [2:10:22<1:05:53, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1689/2500 [2:10:27<1:03:48, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1690/2500 [2:10:31<1:00:57, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1691/2500 [2:10:35<59:57, 4.45s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1692/2500 [2:10:41<1:06:44, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1693/2500 [2:10:49<1:16:47, 5.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1694/2500 [2:10:52<1:07:32, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1695/2500 [2:10:58<1:09:40, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1696/2500 [2:11:03<1:10:40, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1697/2500 [2:11:07<1:06:23, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1698/2500 [2:11:11<1:00:21, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1699/2500 [2:11:14<56:59, 4.27s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1700/2500 [2:11:20<1:01:27, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1701/2500 [2:11:23<55:36, 4.18s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1702/2500 [2:11:26<51:31, 3.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1703/2500 [2:11:30<49:39, 3.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1704/2500 [2:11:33<48:52, 3.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1705/2500 [2:11:38<54:44, 4.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1706/2500 [2:11:42<51:31, 3.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1707/2500 [2:11:46<51:05, 3.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1708/2500 [2:11:51<57:37, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1709/2500 [2:11:56<59:58, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1710/2500 [2:12:01<59:37, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1711/2500 [2:12:07<1:06:04, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1712/2500 [2:12:10<58:29, 4.45s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▊ | 1713/2500 [2:12:16<1:03:13, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▊ | 1714/2500 [2:12:20<1:01:08, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▊ | 1715/2500 [2:12:25<1:02:39, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▊ | 1716/2500 [2:12:28<56:01, 4.29s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▊ | 1717/2500 [2:12:31<52:38, 4.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▊ | 1718/2500 [2:12:34<48:33, 3.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1719/2500 [2:12:39<52:56, 4.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1720/2500 [2:12:44<54:19, 4.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1721/2500 [2:12:49<57:49, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1722/2500 [2:12:53<55:38, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1723/2500 [2:12:57<56:31, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1724/2500 [2:13:01<55:11, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1725/2500 [2:13:07<1:01:08, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1726/2500 [2:13:12<1:00:33, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1727/2500 [2:13:16<57:17, 4.45s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1728/2500 [2:13:21<1:00:18, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1729/2500 [2:13:25<57:38, 4.49s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1730/2500 [2:13:31<1:02:20, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1731/2500 [2:13:36<1:04:25, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1732/2500 [2:13:40<1:00:28, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1733/2500 [2:13:45<1:02:43, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1734/2500 [2:13:51<1:04:28, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1735/2500 [2:13:56<1:04:08, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1736/2500 [2:14:01<1:03:17, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1737/2500 [2:14:06<1:05:25, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1738/2500 [2:14:12<1:07:05, 5.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1739/2500 [2:14:15<1:01:09, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1740/2500 [2:14:22<1:06:35, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1741/2500 [2:14:27<1:05:16, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1742/2500 [2:14:31<1:01:08, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1743/2500 [2:14:37<1:06:59, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1744/2500 [2:14:44<1:11:22, 5.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1745/2500 [2:14:49<1:11:01, 5.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1746/2500 [2:14:54<1:07:41, 5.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1747/2500 [2:14:58<1:03:01, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1748/2500 [2:15:02<58:56, 4.70s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1749/2500 [2:15:07<57:38, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1750/2500 [2:15:10<54:41, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1751/2500 [2:15:16<1:00:07, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1752/2500 [2:15:22<1:02:13, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1753/2500 [2:15:26<1:00:07, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1754/2500 [2:15:32<1:04:05, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1755/2500 [2:15:37<1:01:39, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1756/2500 [2:15:42<1:01:48, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1757/2500 [2:15:48<1:05:28, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1758/2500 [2:15:51<1:00:14, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1759/2500 [2:15:55<55:42, 4.51s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1760/2500 [2:15:58<50:34, 4.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1761/2500 [2:16:03<52:34, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1762/2500 [2:16:07<52:04, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1763/2500 [2:16:12<53:39, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1764/2500 [2:16:17<55:48, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1765/2500 [2:16:21<55:50, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1766/2500 [2:16:26<56:18, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1767/2500 [2:16:30<52:14, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1768/2500 [2:16:35<55:34, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1769/2500 [2:16:40<57:36, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1770/2500 [2:16:44<55:01, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1771/2500 [2:16:47<48:48, 4.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1772/2500 [2:16:52<52:00, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1773/2500 [2:16:55<50:03, 4.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1774/2500 [2:17:00<50:41, 4.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1775/2500 [2:17:04<50:39, 4.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1776/2500 [2:17:08<49:42, 4.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1777/2500 [2:17:12<48:26, 4.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1778/2500 [2:17:16<48:14, 4.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1779/2500 [2:17:19<46:59, 3.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1780/2500 [2:17:23<45:49, 3.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1781/2500 [2:17:27<46:23, 3.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 1782/2500 [2:17:34<57:43, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 1783/2500 [2:17:38<56:20, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 1784/2500 [2:17:43<54:32, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 1785/2500 [2:17:48<57:56, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 1786/2500 [2:17:53<57:39, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 1787/2500 [2:17:56<50:38, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1788/2500 [2:17:59<47:15, 3.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1789/2500 [2:18:03<47:14, 3.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1790/2500 [2:18:08<50:46, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1791/2500 [2:18:13<51:38, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1792/2500 [2:18:19<56:40, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1793/2500 [2:18:25<1:00:23, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1794/2500 [2:18:30<1:00:45, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1795/2500 [2:18:35<1:00:50, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1796/2500 [2:18:39<56:48, 4.84s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1797/2500 [2:18:45<59:01, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1798/2500 [2:18:54<1:13:58, 6.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1799/2500 [2:19:05<1:29:45, 7.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1800/2500 [2:19:11<1:25:50, 7.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1801/2500 [2:19:15<1:12:48, 6.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1802/2500 [2:19:19<1:04:35, 5.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1803/2500 [2:19:23<59:26, 5.12s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1804/2500 [2:19:28<57:47, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1805/2500 [2:19:32<54:59, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1806/2500 [2:19:37<57:14, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1807/2500 [2:19:42<55:18, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1808/2500 [2:19:46<52:30, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1809/2500 [2:19:50<50:20, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1810/2500 [2:19:54<50:38, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1811/2500 [2:19:58<49:24, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1812/2500 [2:20:03<52:13, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1813/2500 [2:20:08<52:31, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1814/2500 [2:20:12<51:50, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1815/2500 [2:20:17<53:14, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1816/2500 [2:20:21<49:57, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1817/2500 [2:20:27<53:20, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1818/2500 [2:20:34<1:03:00, 5.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1819/2500 [2:20:38<58:59, 5.20s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1820/2500 [2:20:43<58:18, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1821/2500 [2:20:51<1:04:44, 5.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1822/2500 [2:20:57<1:07:04, 5.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1823/2500 [2:21:07<1:20:04, 7.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1824/2500 [2:21:14<1:19:57, 7.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1825/2500 [2:21:19<1:13:10, 6.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1826/2500 [2:21:24<1:08:38, 6.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1827/2500 [2:21:29<1:02:46, 5.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1828/2500 [2:21:34<1:03:38, 5.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1829/2500 [2:21:39<1:00:34, 5.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1830/2500 [2:21:44<58:04, 5.20s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1831/2500 [2:21:50<1:00:14, 5.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1832/2500 [2:21:56<1:01:52, 5.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1833/2500 [2:22:01<59:58, 5.39s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1834/2500 [2:22:07<1:02:08, 5.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1835/2500 [2:22:15<1:11:16, 6.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1836/2500 [2:22:20<1:05:55, 5.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1837/2500 [2:22:25<1:03:13, 5.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 1838/2500 [2:22:29<57:39, 5.23s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 1839/2500 [2:22:37<1:04:35, 5.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 1840/2500 [2:22:45<1:13:28, 6.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 1841/2500 [2:22:52<1:12:57, 6.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 1842/2500 [2:22:59<1:13:20, 6.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 1843/2500 [2:23:06<1:15:21, 6.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1844/2500 [2:23:12<1:11:01, 6.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1845/2500 [2:23:17<1:07:22, 6.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1846/2500 [2:23:23<1:07:23, 6.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1847/2500 [2:23:30<1:08:43, 6.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1848/2500 [2:23:35<1:06:00, 6.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1849/2500 [2:23:38<56:23, 5.20s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1850/2500 [2:23:46<1:04:24, 5.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1851/2500 [2:23:53<1:07:51, 6.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1852/2500 [2:24:00<1:09:53, 6.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1853/2500 [2:24:06<1:06:55, 6.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1854/2500 [2:24:11<1:04:42, 6.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1855/2500 [2:24:15<57:22, 5.34s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1856/2500 [2:24:20<54:31, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1857/2500 [2:24:24<52:59, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1858/2500 [2:24:29<52:42, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1859/2500 [2:24:33<48:19, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1860/2500 [2:24:37<49:16, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1861/2500 [2:24:43<51:53, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1862/2500 [2:24:49<55:21, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1863/2500 [2:24:53<53:01, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1864/2500 [2:24:58<52:13, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1865/2500 [2:25:05<57:14, 5.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1866/2500 [2:25:11<1:00:44, 5.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1867/2500 [2:25:17<1:01:19, 5.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1868/2500 [2:25:23<1:00:24, 5.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1869/2500 [2:25:27<55:20, 5.26s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1870/2500 [2:25:34<1:00:54, 5.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1871/2500 [2:25:39<57:47, 5.51s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1872/2500 [2:25:45<1:01:06, 5.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1873/2500 [2:25:50<56:54, 5.45s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1874/2500 [2:25:55<54:07, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1875/2500 [2:26:00<54:41, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1876/2500 [2:26:06<55:43, 5.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1877/2500 [2:26:09<51:07, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1878/2500 [2:26:16<57:40, 5.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1879/2500 [2:26:21<54:25, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1880/2500 [2:26:27<56:52, 5.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1881/2500 [2:26:32<54:24, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1882/2500 [2:26:35<49:12, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1883/2500 [2:26:41<49:56, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1884/2500 [2:26:45<49:27, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1885/2500 [2:26:50<48:42, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1886/2500 [2:26:53<44:23, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1887/2500 [2:26:57<42:59, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1888/2500 [2:27:02<46:05, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1889/2500 [2:27:09<51:24, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1890/2500 [2:27:14<50:46, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1891/2500 [2:27:18<50:34, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1892/2500 [2:27:23<49:53, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1893/2500 [2:27:28<48:33, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1894/2500 [2:27:31<44:05, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1895/2500 [2:27:36<45:08, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1896/2500 [2:27:42<49:15, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1897/2500 [2:27:46<48:19, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1898/2500 [2:27:50<44:51, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1899/2500 [2:27:54<44:48, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1900/2500 [2:28:02<53:06, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1901/2500 [2:28:06<50:03, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1902/2500 [2:28:10<46:44, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1903/2500 [2:28:16<50:01, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1904/2500 [2:28:23<55:04, 5.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1905/2500 [2:28:30<1:00:24, 6.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1906/2500 [2:28:37<1:02:41, 6.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 1907/2500 [2:28:43<1:02:45, 6.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 1908/2500 [2:28:49<1:01:39, 6.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 1909/2500 [2:28:55<59:59, 6.09s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 1910/2500 [2:29:00<56:59, 5.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 1911/2500 [2:29:06<56:13, 5.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 1912/2500 [2:29:09<50:01, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1913/2500 [2:29:16<53:27, 5.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1914/2500 [2:29:21<52:18, 5.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1915/2500 [2:29:25<49:45, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1916/2500 [2:29:31<51:42, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1917/2500 [2:29:40<1:01:10, 6.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1918/2500 [2:29:46<1:00:39, 6.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1919/2500 [2:29:52<1:00:21, 6.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1920/2500 [2:29:58<58:58, 6.10s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1921/2500 [2:30:02<53:31, 5.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1922/2500 [2:30:09<57:58, 6.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1923/2500 [2:30:15<58:10, 6.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1924/2500 [2:30:20<53:54, 5.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1925/2500 [2:30:25<53:46, 5.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1926/2500 [2:30:30<50:42, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1927/2500 [2:30:36<53:15, 5.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1928/2500 [2:30:40<49:05, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1929/2500 [2:30:46<49:44, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1930/2500 [2:30:51<48:48, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1931/2500 [2:30:54<44:01, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1932/2500 [2:30:59<44:45, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1933/2500 [2:31:06<50:15, 5.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1934/2500 [2:31:12<53:29, 5.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1935/2500 [2:31:17<49:18, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1936/2500 [2:31:23<52:27, 5.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1937/2500 [2:31:27<48:19, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1938/2500 [2:31:32<48:17, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1939/2500 [2:31:38<50:38, 5.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1940/2500 [2:31:47<59:44, 6.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1941/2500 [2:31:54<1:00:41, 6.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1942/2500 [2:32:01<1:01:51, 6.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1943/2500 [2:32:06<58:21, 6.29s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1944/2500 [2:32:13<58:29, 6.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1945/2500 [2:32:17<53:34, 5.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1946/2500 [2:32:23<53:58, 5.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1947/2500 [2:32:27<48:42, 5.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1948/2500 [2:32:32<47:22, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1949/2500 [2:32:37<46:36, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1950/2500 [2:32:40<41:41, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1951/2500 [2:32:47<46:44, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1952/2500 [2:32:52<46:57, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1953/2500 [2:32:56<45:33, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1954/2500 [2:33:02<47:20, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1955/2500 [2:33:08<47:51, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1956/2500 [2:33:11<43:39, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1957/2500 [2:33:16<43:33, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1958/2500 [2:33:20<42:19, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1959/2500 [2:33:27<47:07, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1960/2500 [2:33:31<43:29, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1961/2500 [2:33:36<44:39, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1962/2500 [2:33:42<47:59, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▊ | 1963/2500 [2:33:49<52:12, 5.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▊ | 1964/2500 [2:33:54<48:59, 5.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▊ | 1965/2500 [2:33:59<48:33, 5.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▊ | 1966/2500 [2:34:06<50:46, 5.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▊ | 1967/2500 [2:34:12<52:50, 5.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▊ | 1968/2500 [2:34:18<52:20, 5.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1969/2500 [2:34:23<49:27, 5.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1970/2500 [2:34:30<53:06, 6.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1971/2500 [2:34:35<51:48, 5.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1972/2500 [2:34:41<51:04, 5.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1973/2500 [2:34:47<51:45, 5.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1974/2500 [2:34:51<46:55, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1975/2500 [2:34:55<42:08, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1976/2500 [2:34:59<40:00, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1977/2500 [2:35:04<41:53, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1978/2500 [2:35:11<47:10, 5.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1979/2500 [2:35:17<47:46, 5.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1980/2500 [2:35:28<1:03:10, 7.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1981/2500 [2:35:34<1:00:32, 7.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1982/2500 [2:35:38<51:16, 5.94s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1983/2500 [2:35:43<47:44, 5.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1984/2500 [2:35:48<48:20, 5.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1985/2500 [2:35:53<46:31, 5.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1986/2500 [2:35:57<41:38, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1987/2500 [2:36:04<48:11, 5.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1988/2500 [2:36:11<50:33, 5.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1989/2500 [2:36:16<48:10, 5.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1990/2500 [2:36:20<42:48, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1991/2500 [2:36:27<47:36, 5.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1992/2500 [2:36:31<45:31, 5.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1993/2500 [2:36:36<43:22, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1994/2500 [2:36:42<45:00, 5.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1995/2500 [2:36:46<41:23, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1996/2500 [2:36:51<41:52, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1997/2500 [2:36:57<45:22, 5.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1998/2500 [2:37:01<42:11, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1999/2500 [2:37:06<41:14, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2000/2500 [2:37:09<35:59, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2001/2500 [2:37:15<41:22, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2002/2500 [2:37:21<43:29, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2003/2500 [2:37:27<44:05, 5.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2004/2500 [2:37:35<51:05, 6.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2005/2500 [2:37:42<52:10, 6.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2006/2500 [2:37:47<48:36, 5.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2007/2500 [2:37:50<41:24, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2008/2500 [2:37:55<41:04, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2009/2500 [2:38:01<44:16, 5.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2010/2500 [2:38:06<44:17, 5.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2011/2500 [2:38:12<43:37, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2012/2500 [2:38:16<41:35, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2013/2500 [2:38:22<44:14, 5.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2014/2500 [2:38:26<40:16, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2015/2500 [2:38:33<43:31, 5.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2016/2500 [2:38:37<41:59, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2017/2500 [2:38:41<37:44, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2018/2500 [2:38:47<40:23, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2019/2500 [2:38:53<42:22, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2020/2500 [2:38:55<36:35, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2021/2500 [2:38:59<34:16, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2022/2500 [2:39:05<37:14, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2023/2500 [2:39:10<38:17, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2024/2500 [2:39:15<40:13, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2025/2500 [2:39:21<42:16, 5.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2026/2500 [2:39:26<39:18, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2027/2500 [2:39:32<42:52, 5.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2028/2500 [2:39:35<37:57, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2029/2500 [2:39:39<34:58, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2030/2500 [2:39:42<32:09, 4.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2031/2500 [2:39:46<30:09, 3.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 2032/2500 [2:39:51<33:10, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 2033/2500 [2:39:54<31:13, 4.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 2034/2500 [2:40:00<34:11, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 2035/2500 [2:40:05<36:08, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 2036/2500 [2:40:09<35:57, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 2037/2500 [2:40:15<36:48, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2038/2500 [2:40:19<36:44, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2039/2500 [2:40:25<39:03, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2040/2500 [2:40:29<35:22, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2041/2500 [2:40:35<40:04, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2042/2500 [2:40:39<36:13, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2043/2500 [2:40:42<33:20, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2044/2500 [2:40:48<35:04, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2045/2500 [2:40:54<37:59, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2046/2500 [2:40:59<39:39, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2047/2500 [2:41:04<39:15, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2048/2500 [2:41:08<36:10, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2049/2500 [2:41:11<32:31, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2050/2500 [2:41:17<34:56, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2051/2500 [2:41:22<35:49, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2052/2500 [2:41:29<39:39, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2053/2500 [2:41:32<36:02, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2054/2500 [2:41:36<34:29, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2055/2500 [2:41:40<31:23, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2056/2500 [2:41:43<30:05, 4.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2057/2500 [2:41:48<30:56, 4.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2058/2500 [2:41:55<37:45, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2059/2500 [2:42:00<37:13, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2060/2500 [2:42:04<34:16, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2061/2500 [2:42:08<33:38, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2062/2500 [2:42:12<32:22, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2063/2500 [2:42:16<31:16, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2064/2500 [2:42:19<28:43, 3.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2065/2500 [2:42:23<28:11, 3.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2066/2500 [2:42:27<27:23, 3.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2067/2500 [2:42:30<25:50, 3.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2068/2500 [2:42:34<27:13, 3.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2069/2500 [2:42:38<28:00, 3.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2070/2500 [2:42:44<32:14, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2071/2500 [2:42:48<30:48, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2072/2500 [2:42:51<27:00, 3.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2073/2500 [2:42:56<30:30, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2074/2500 [2:43:01<31:19, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2075/2500 [2:43:06<33:00, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2076/2500 [2:43:10<31:34, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2077/2500 [2:43:15<32:13, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2078/2500 [2:43:19<31:44, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2079/2500 [2:43:25<33:27, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2080/2500 [2:43:30<33:41, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2081/2500 [2:43:34<33:02, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2082/2500 [2:43:39<34:22, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2083/2500 [2:43:43<32:04, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2084/2500 [2:43:48<32:03, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2085/2500 [2:43:54<35:10, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2086/2500 [2:43:59<35:12, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2087/2500 [2:44:02<31:12, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2088/2500 [2:44:08<32:43, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2089/2500 [2:44:11<30:12, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2090/2500 [2:44:17<32:04, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2091/2500 [2:44:21<30:34, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2092/2500 [2:44:26<31:18, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2093/2500 [2:44:31<33:48, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2094/2500 [2:44:35<30:41, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2095/2500 [2:44:39<28:36, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2096/2500 [2:44:43<29:57, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2097/2500 [2:44:47<28:20, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2098/2500 [2:44:50<26:27, 3.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2099/2500 [2:44:55<28:19, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2100/2500 [2:45:00<28:40, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2101/2500 [2:45:03<26:14, 3.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2102/2500 [2:45:09<29:33, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2103/2500 [2:45:12<27:48, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2104/2500 [2:45:16<27:06, 4.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2105/2500 [2:45:20<27:00, 4.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2106/2500 [2:45:23<25:16, 3.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2107/2500 [2:45:28<26:06, 3.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2108/2500 [2:45:32<26:37, 4.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2109/2500 [2:45:36<25:45, 3.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2110/2500 [2:45:39<24:52, 3.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2111/2500 [2:45:44<26:10, 4.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2112/2500 [2:45:47<24:24, 3.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2113/2500 [2:45:51<25:53, 4.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2114/2500 [2:45:57<28:17, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2115/2500 [2:46:03<30:51, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2116/2500 [2:46:06<28:45, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2117/2500 [2:46:13<31:57, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2118/2500 [2:46:17<31:24, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2119/2500 [2:46:23<32:30, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2120/2500 [2:46:30<36:17, 5.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2121/2500 [2:46:33<31:59, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2122/2500 [2:46:39<32:15, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2123/2500 [2:46:42<29:30, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2124/2500 [2:46:46<28:06, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2125/2500 [2:46:50<26:47, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2126/2500 [2:46:55<27:48, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2127/2500 [2:46:59<26:37, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2128/2500 [2:47:03<25:58, 4.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2129/2500 [2:47:07<26:26, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2130/2500 [2:47:12<27:46, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2131/2500 [2:47:17<27:14, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2132/2500 [2:47:21<27:04, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2133/2500 [2:47:26<28:08, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2134/2500 [2:47:29<25:15, 4.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2135/2500 [2:47:33<24:29, 4.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2136/2500 [2:47:38<27:04, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2137/2500 [2:47:43<26:37, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2138/2500 [2:47:47<25:39, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2139/2500 [2:47:51<25:38, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2140/2500 [2:47:55<25:13, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2141/2500 [2:48:00<26:46, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2142/2500 [2:48:05<26:35, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2143/2500 [2:48:10<29:11, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2144/2500 [2:48:15<28:14, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2145/2500 [2:48:20<28:39, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2146/2500 [2:48:23<26:06, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2147/2500 [2:48:28<25:45, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2148/2500 [2:48:32<24:53, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2149/2500 [2:48:36<24:25, 4.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2150/2500 [2:48:41<27:16, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2151/2500 [2:48:46<26:51, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2152/2500 [2:48:50<26:01, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2153/2500 [2:48:54<25:28, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2154/2500 [2:48:58<24:59, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2155/2500 [2:49:03<25:18, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2156/2500 [2:49:07<23:39, 4.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2157/2500 [2:49:12<26:05, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2158/2500 [2:49:16<25:03, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2159/2500 [2:49:21<25:46, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2160/2500 [2:49:27<27:59, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2161/2500 [2:49:33<29:56, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2162/2500 [2:49:40<32:13, 5.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2163/2500 [2:49:45<31:08, 5.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2164/2500 [2:49:49<28:50, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2165/2500 [2:49:55<30:29, 5.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2166/2500 [2:50:00<29:58, 5.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2167/2500 [2:50:05<28:44, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2168/2500 [2:50:11<30:19, 5.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2169/2500 [2:50:17<30:18, 5.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2170/2500 [2:50:22<28:49, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2171/2500 [2:50:28<31:00, 5.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2172/2500 [2:50:33<29:59, 5.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2173/2500 [2:50:38<28:16, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2174/2500 [2:50:43<28:34, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2175/2500 [2:50:49<29:21, 5.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2176/2500 [2:50:56<32:09, 5.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2177/2500 [2:51:00<29:24, 5.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2178/2500 [2:51:07<31:03, 5.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2179/2500 [2:51:13<30:47, 5.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2180/2500 [2:51:18<29:57, 5.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2181/2500 [2:51:22<28:00, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2182/2500 [2:51:29<29:26, 5.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2183/2500 [2:51:33<28:07, 5.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2184/2500 [2:51:40<30:11, 5.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2185/2500 [2:51:46<30:01, 5.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2186/2500 [2:51:51<28:49, 5.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2187/2500 [2:51:57<29:48, 5.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2188/2500 [2:52:01<27:18, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2189/2500 [2:52:06<26:15, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2190/2500 [2:52:10<25:26, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2191/2500 [2:52:15<24:09, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2192/2500 [2:52:19<23:54, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2193/2500 [2:52:25<25:49, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2194/2500 [2:52:30<26:13, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2195/2500 [2:52:35<24:54, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2196/2500 [2:52:39<23:54, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2197/2500 [2:52:44<24:22, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2198/2500 [2:52:48<23:10, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2199/2500 [2:52:55<25:59, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2200/2500 [2:53:00<26:08, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2201/2500 [2:53:07<27:55, 5.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2202/2500 [2:53:11<26:39, 5.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2203/2500 [2:53:16<25:12, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2204/2500 [2:53:23<28:08, 5.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2205/2500 [2:53:28<26:18, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2206/2500 [2:53:32<25:16, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2207/2500 [2:53:37<24:50, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2208/2500 [2:53:41<22:55, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2209/2500 [2:53:45<21:19, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2210/2500 [2:53:49<21:25, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2211/2500 [2:53:56<24:17, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2212/2500 [2:53:59<22:09, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▊ | 2213/2500 [2:54:05<23:07, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▊ | 2214/2500 [2:54:08<21:22, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▊ | 2215/2500 [2:54:15<24:10, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▊ | 2216/2500 [2:54:21<25:53, 5.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▊ | 2217/2500 [2:54:26<25:29, 5.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▊ | 2218/2500 [2:54:31<23:53, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2219/2500 [2:54:35<23:11, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2220/2500 [2:54:40<22:49, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2221/2500 [2:54:44<21:45, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2222/2500 [2:54:49<21:15, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2223/2500 [2:54:53<20:59, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2224/2500 [2:54:57<19:25, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2225/2500 [2:55:01<19:45, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2226/2500 [2:55:07<22:03, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2227/2500 [2:55:13<23:55, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2228/2500 [2:55:21<26:22, 5.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2229/2500 [2:55:25<23:59, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2230/2500 [2:55:29<23:03, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2231/2500 [2:55:35<23:12, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2232/2500 [2:55:40<23:13, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2233/2500 [2:55:46<24:10, 5.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2234/2500 [2:55:51<23:22, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2235/2500 [2:55:56<22:36, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2236/2500 [2:55:59<20:21, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2237/2500 [2:56:03<19:27, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2238/2500 [2:56:09<20:56, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2239/2500 [2:56:14<21:40, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2240/2500 [2:56:19<21:47, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2241/2500 [2:56:23<20:22, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2242/2500 [2:56:28<19:50, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2243/2500 [2:56:32<19:23, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2244/2500 [2:56:37<19:45, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2245/2500 [2:56:41<19:14, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2246/2500 [2:56:46<19:17, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2247/2500 [2:56:51<20:07, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2248/2500 [2:56:56<20:00, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2249/2500 [2:57:00<19:56, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2250/2500 [2:57:05<20:08, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2251/2500 [2:57:10<19:35, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2252/2500 [2:57:16<21:12, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2253/2500 [2:57:21<20:33, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2254/2500 [2:57:25<19:05, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2255/2500 [2:57:29<18:39, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2256/2500 [2:57:34<19:42, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2257/2500 [2:57:40<20:20, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2258/2500 [2:57:45<20:30, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2259/2500 [2:57:48<18:00, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2260/2500 [2:57:52<17:18, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2261/2500 [2:57:55<16:05, 4.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2262/2500 [2:58:00<16:07, 4.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2263/2500 [2:58:04<15:51, 4.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2264/2500 [2:58:07<15:36, 3.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2265/2500 [2:58:11<15:33, 3.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2266/2500 [2:58:17<17:37, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2267/2500 [2:58:23<19:08, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2268/2500 [2:58:27<18:16, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2269/2500 [2:58:33<19:40, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2270/2500 [2:58:37<18:02, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2271/2500 [2:58:44<20:11, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2272/2500 [2:58:47<17:52, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2273/2500 [2:58:51<17:10, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2274/2500 [2:58:56<16:56, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2275/2500 [2:59:01<18:09, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2276/2500 [2:59:06<18:01, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2277/2500 [2:59:11<18:12, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2278/2500 [2:59:17<18:42, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2279/2500 [2:59:20<16:51, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2280/2500 [2:59:24<15:46, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2281/2500 [2:59:28<15:26, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 2282/2500 [2:59:32<15:00, 4.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 2283/2500 [2:59:36<15:36, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 2284/2500 [2:59:40<14:53, 4.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 2285/2500 [2:59:44<14:59, 4.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 2286/2500 [2:59:48<14:14, 3.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 2287/2500 [2:59:52<14:36, 4.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2288/2500 [2:59:56<14:17, 4.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2289/2500 [3:00:01<15:24, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2290/2500 [3:00:06<15:59, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2291/2500 [3:00:12<17:29, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2292/2500 [3:00:16<16:08, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2293/2500 [3:00:21<15:44, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2294/2500 [3:00:24<14:53, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2295/2500 [3:00:29<15:14, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2296/2500 [3:00:33<14:37, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2297/2500 [3:00:37<14:17, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2298/2500 [3:00:44<16:23, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2299/2500 [3:00:48<16:06, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2300/2500 [3:00:54<16:43, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2301/2500 [3:00:58<15:48, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2302/2500 [3:01:01<14:16, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2303/2500 [3:01:06<14:48, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2304/2500 [3:01:11<14:42, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2305/2500 [3:01:15<14:16, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2306/2500 [3:01:19<14:17, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2307/2500 [3:01:24<14:19, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2308/2500 [3:01:28<13:42, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2309/2500 [3:01:32<13:29, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2310/2500 [3:01:37<14:21, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2311/2500 [3:01:41<13:37, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2312/2500 [3:01:45<13:34, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2313/2500 [3:01:49<12:51, 4.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2314/2500 [3:01:53<12:47, 4.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2315/2500 [3:02:00<15:05, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2316/2500 [3:02:03<13:50, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2317/2500 [3:02:07<13:06, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2318/2500 [3:02:12<13:17, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2319/2500 [3:02:16<13:05, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2320/2500 [3:02:19<12:13, 4.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2321/2500 [3:02:23<11:38, 3.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2322/2500 [3:02:28<13:07, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2323/2500 [3:02:33<13:16, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2324/2500 [3:02:37<12:48, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2325/2500 [3:02:40<11:45, 4.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2326/2500 [3:02:46<13:03, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2327/2500 [3:02:50<12:04, 4.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2328/2500 [3:02:55<12:54, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2329/2500 [3:03:00<13:35, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2330/2500 [3:03:07<15:05, 5.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2331/2500 [3:03:11<14:25, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2332/2500 [3:03:15<13:16, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2333/2500 [3:03:20<12:48, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2334/2500 [3:03:26<14:02, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2335/2500 [3:03:31<14:14, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2336/2500 [3:03:36<13:30, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2337/2500 [3:03:40<12:47, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2338/2500 [3:03:44<12:44, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2339/2500 [3:03:48<12:04, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2340/2500 [3:03:53<11:48, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2341/2500 [3:03:57<11:16, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2342/2500 [3:04:02<12:28, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2343/2500 [3:04:08<12:41, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2344/2500 [3:04:13<12:44, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2345/2500 [3:04:19<13:31, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2346/2500 [3:04:22<12:13, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2347/2500 [3:04:28<12:37, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2348/2500 [3:04:35<14:42, 5.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2349/2500 [3:04:40<13:39, 5.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2350/2500 [3:04:46<14:18, 5.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2351/2500 [3:04:52<14:09, 5.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2352/2500 [3:04:58<14:08, 5.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2353/2500 [3:05:02<13:05, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2354/2500 [3:05:08<13:01, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2355/2500 [3:05:11<11:49, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2356/2500 [3:05:17<12:19, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2357/2500 [3:05:22<12:15, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2358/2500 [3:05:27<12:01, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2359/2500 [3:05:32<11:29, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2360/2500 [3:05:36<10:58, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2361/2500 [3:05:42<11:51, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2362/2500 [3:05:45<10:29, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2363/2500 [3:05:49<10:04, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2364/2500 [3:05:55<10:40, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2365/2500 [3:05:59<10:10, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2366/2500 [3:06:05<10:54, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2367/2500 [3:06:09<10:19, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2368/2500 [3:06:13<09:53, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2369/2500 [3:06:18<10:25, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2370/2500 [3:06:23<10:19, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2371/2500 [3:06:28<10:29, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2372/2500 [3:06:33<10:09, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2373/2500 [3:06:38<10:27, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2374/2500 [3:06:42<09:55, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2375/2500 [3:06:46<09:32, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2376/2500 [3:06:51<09:44, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2377/2500 [3:06:56<09:48, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2378/2500 [3:07:03<10:46, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2379/2500 [3:07:09<11:08, 5.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2380/2500 [3:07:15<11:17, 5.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2381/2500 [3:07:23<12:40, 6.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2382/2500 [3:07:30<12:48, 6.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2383/2500 [3:07:33<10:54, 5.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2384/2500 [3:07:37<09:38, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2385/2500 [3:07:41<08:53, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2386/2500 [3:07:44<08:17, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2387/2500 [3:07:48<07:53, 4.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2388/2500 [3:07:52<07:22, 3.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2389/2500 [3:07:56<07:34, 4.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2390/2500 [3:08:01<08:10, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2391/2500 [3:08:07<08:53, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2392/2500 [3:08:12<08:52, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2393/2500 [3:08:16<08:01, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2394/2500 [3:08:20<07:37, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2395/2500 [3:08:27<09:16, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2396/2500 [3:08:32<09:01, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2397/2500 [3:08:38<09:20, 5.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2398/2500 [3:08:43<08:58, 5.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2399/2500 [3:08:47<08:20, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2400/2500 [3:08:52<08:16, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2401/2500 [3:08:59<08:51, 5.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2402/2500 [3:09:03<08:27, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2403/2500 [3:09:06<07:18, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2404/2500 [3:09:09<06:33, 4.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2405/2500 [3:09:14<06:40, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2406/2500 [3:09:17<06:16, 4.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2407/2500 [3:09:23<06:41, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2408/2500 [3:09:29<07:49, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2409/2500 [3:09:34<07:19, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2410/2500 [3:09:38<07:10, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2411/2500 [3:09:42<06:32, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2412/2500 [3:09:48<07:25, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2413/2500 [3:09:53<07:01, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2414/2500 [3:09:58<07:01, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2415/2500 [3:10:02<06:31, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2416/2500 [3:10:07<06:44, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2417/2500 [3:10:12<06:43, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2418/2500 [3:10:17<06:44, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2419/2500 [3:10:22<06:30, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2420/2500 [3:10:28<06:50, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2421/2500 [3:10:34<07:12, 5.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2422/2500 [3:10:38<06:39, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2423/2500 [3:10:42<06:09, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2424/2500 [3:10:48<06:26, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2425/2500 [3:10:52<05:59, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2426/2500 [3:10:57<05:49, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2427/2500 [3:11:02<05:56, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2428/2500 [3:11:08<06:21, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2429/2500 [3:11:12<05:54, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2430/2500 [3:11:16<05:26, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2431/2500 [3:11:21<05:20, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2432/2500 [3:11:24<04:44, 4.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2433/2500 [3:11:29<04:54, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2434/2500 [3:11:32<04:31, 4.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2435/2500 [3:11:39<05:14, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2436/2500 [3:11:45<05:30, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2437/2500 [3:11:49<05:16, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2438/2500 [3:11:55<05:21, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2439/2500 [3:12:00<05:07, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2440/2500 [3:12:07<05:40, 5.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2441/2500 [3:12:11<05:03, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2442/2500 [3:12:15<04:48, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2443/2500 [3:12:21<04:53, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2444/2500 [3:12:25<04:30, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2445/2500 [3:12:28<04:00, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2446/2500 [3:12:33<03:53, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2447/2500 [3:12:36<03:33, 4.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2448/2500 [3:12:40<03:33, 4.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2449/2500 [3:12:45<03:40, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2450/2500 [3:12:48<03:22, 4.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2451/2500 [3:12:54<03:41, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2452/2500 [3:13:00<03:52, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2453/2500 [3:13:04<03:35, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2454/2500 [3:13:08<03:22, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2455/2500 [3:13:11<03:11, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2456/2500 [3:13:16<03:10, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2457/2500 [3:13:19<02:54, 4.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2458/2500 [3:13:23<02:45, 3.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2459/2500 [3:13:29<03:09, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2460/2500 [3:13:36<03:25, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2461/2500 [3:13:41<03:17, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2462/2500 [3:13:45<03:00, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▊| 2463/2500 [3:13:49<02:55, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▊| 2464/2500 [3:13:53<02:39, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▊| 2465/2500 [3:13:57<02:33, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▊| 2466/2500 [3:14:00<02:14, 3.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▊| 2467/2500 [3:14:05<02:21, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▊| 2468/2500 [3:14:09<02:16, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2469/2500 [3:14:16<02:33, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2470/2500 [3:14:21<02:27, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2471/2500 [3:14:24<02:11, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2472/2500 [3:14:29<02:08, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2473/2500 [3:14:34<02:08, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2474/2500 [3:14:39<01:59, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2475/2500 [3:14:43<01:56, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2476/2500 [3:14:47<01:42, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2477/2500 [3:14:54<01:56, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2478/2500 [3:14:57<01:41, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2479/2500 [3:15:01<01:34, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2480/2500 [3:15:09<01:46, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2481/2500 [3:15:13<01:37, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2482/2500 [3:15:17<01:25, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2483/2500 [3:15:21<01:16, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2484/2500 [3:15:26<01:11, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2485/2500 [3:15:31<01:10, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2486/2500 [3:15:36<01:06, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2487/2500 [3:15:39<00:55, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2488/2500 [3:15:44<00:52, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2489/2500 [3:15:49<00:51, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2490/2500 [3:15:54<00:48, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2491/2500 [3:16:01<00:49, 5.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2492/2500 [3:16:06<00:42, 5.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2493/2500 [3:16:12<00:38, 5.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2494/2500 [3:16:18<00:32, 5.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2495/2500 [3:16:21<00:24, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2496/2500 [3:16:25<00:17, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2497/2500 [3:16:29<00:12, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2498/2500 [3:16:34<00:09, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2499/2500 [3:16:38<00:04, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 2500/2500 [3:16:44<00:00, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "Evaluation metrics: {'f1': 0.2408, 'em': 0.2408, 'acc': 0.9112}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "\n", "optimizer.evaluator.dataname = 'hotpotqa'\n", "optimizer.optimize(dataset=benchmark,provided_scorer=True)\n", "optimizer.restore_best_graph()\n", "optimizer.save(\"./debug/save_30_noreason.json\")\n", "\n", "# evaluate the optimized SEW workflow\n", "\n", "optimizer.evaluator.dataname = 'hotpotqa'\n", "with suppress_logger_info():\n", " metrics = optimizer.evaluate(dataset=benchmark, eval_mode=\"test\")\n", "print(\"Evaluation metrics: \", metrics)" ] }, { "cell_type": "code", "execution_count": 18, "id": "491d1969", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-01 17:07:57.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36mrestore_best_graph\u001b[0m:\u001b[36m1211\u001b[0m - \u001b[1mRestore the best graph from snapshot with metrics {'f1': 0.0, 'em': 0.0, 'acc': 0.98} ...\u001b[0m\n" ] } ], "source": [ "optimizer.restore_best_graph()" ] }, { "cell_type": "code", "execution_count": 33, "id": "31106952", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'f1': 0.2408, 'em': 0.2408, 'acc': 0.9112}" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metrics" ] }, { "cell_type": "code", "execution_count": 20, "id": "9e7d33f4", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "SequentialWorkFlowGraph(class_name='SequentialWorkFlowGraph', goal='Provide a concise answer to the question using relevant context. The answer must be straightforward and avoid unnecessary explanations.', nodes=[WorkFlowNode(class_name='WorkFlowNode', name='generate_answer', description='Extract and formulate an answer from the given context.', inputs=[Parameter(class_name='Parameter', name='question', type='str', description='The question that needs to be answered.', required=True)], outputs=[Parameter(class_name='Parameter', name='answer', type='str', description='The direct answer to the question.', required=True)], reason=None, agents=[{'name': 'GenerateAnswerAgent', 'description': 'Extract and formulate an answer from the given context.', 'prompt': '\"\"\"\\nUse the context provided in `{question}` to determine the best answer. Validate the answer for accuracy against relevant criteria before finalization. Present the final answer in a clear and consistent format, such as a single sentence or short phrase. If the answer is ambiguous or unclear, indicate this explicitly. Address any complexities or nuances appropriately while avoiding unnecessary commentary or reasoning.\\n\"\"\"', 'prompt_template': StringTemplate(class_name='StringTemplate', instruction='Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.', context=None, constraints=None, tools=None, demonstrations=None, history=None), 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'output_parser': None, 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}], action_graph=None, status=), WorkFlowNode(class_name='WorkFlowNode', name='handle_format_errors5318', description='Task to handle_format_errors5318. Takes validated_answer as input. Produces final_answer as output.', inputs=[Parameter(class_name='Parameter', name='validated_answer', type='str', description='Input parameter validated_answer for handle_format_errors5318', required=False)], outputs=[Parameter(class_name='Parameter', name='final_answer', type='str', description='Output parameter final_answer from handle_format_errors5318', required=True)], reason=None, agents=[{'name': 'HandleFormatErrors5318Agent', 'description': 'Task to handle_format_errors5318. Takes validated_answer as input. Produces final_answer as output.', 'prompt': '```xml\\n\"\"\"\\nThink step by step to answer the question based on the context provided in {question}. Clearly define what constitutes a \"significant change\" in gene expression based on the context. If the perturbation does not lead to a significant change, explicitly state that in your response. Additionally, if there are ambiguous elements or discrepancies between predictions and ground-truth solutions, identify those aspects and adjust the answer accordingly. Ensure that your response is clear and concise. Format your output in xml format, such as {thought} and {answer}.\\n\"\"\"\\n```', 'prompt_template': StringTemplate(class_name='StringTemplate', instruction=\"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\", context=None, constraints=None, tools=None, demonstrations=None, history=None), 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for handle_format_errors5318', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from handle_format_errors5318', 'required': True}], 'output_parser': None, 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}], action_graph=None, status=), WorkFlowNode(class_name='WorkFlowNode', name='validate_answer4773', description='Task to validate_answer4773. Takes answer as input. Produces validated_answer as output.', inputs=[Parameter(class_name='Parameter', name='answer', type='str', description='Input parameter answer for validate_answer4773', required=False)], outputs=[Parameter(class_name='Parameter', name='validated_answer', type='str', description='Output parameter validated_answer from validate_answer4773', required=True)], reason=None, agents=[{'name': 'ValidateAnswer4773Agent', 'description': 'Task to validate_answer4773. Takes answer as input. Produces validated_answer as output.', 'prompt': '``` \\nINSTRUCTION for the 3-th task:\\n\"\"\"\\nAnalyze the provided {question} thoroughly to generate a relevant and accurate answer. Clearly define what constitutes a \"significant change\" based on the context provided. In the \\'thought\\' field, detail your reasoning process, addressing any potential ambiguities, conflicts, or uncertainties that may arise in the answer. If there are conflicting predictions or solutions, clarify how you resolved them and prioritize the evidence used in your reasoning. Validate the accuracy of your answer against known ground-truth solutions before finalizing it. In the \\'answer\\' field, provide the final response, ensuring it adheres to the expected format. If the answer does not conform to the expected format, clearly indicate the issue. Format your output in XML format, such as {thought} and {answer}.\\n\"\"\"\\n```', 'prompt_template': StringTemplate(class_name='StringTemplate', instruction=\"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\", context=None, constraints=None, tools=None, demonstrations=None, history=None), 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer4773', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer4773', 'required': True}], 'output_parser': None, 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None}], action_graph=None, status=)], edges=[WorkFlowEdge(class_name='WorkFlowEdge', source='generate_answer', target='validate_answer4773', priority=0), WorkFlowEdge(class_name='WorkFlowEdge', source='validate_answer4773', target='handle_format_errors5318', priority=0)], graph=)" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "optimizer.graph" ] }, { "cell_type": "code", "execution_count": 21, "id": "3386cab6", "metadata": {}, "outputs": [], "source": [ "# optimizer.save(\"./debug/agent_check_ourloop.json\")" ] }, { "cell_type": "code", "execution_count": 16, "id": "7088f101", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[32m2025-12-28 15:25:07.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.pertqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m52\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/adamson_train.json ...\u001b[0m\n", "\u001b[32m2025-12-28 15:25:07.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.pertqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m52\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/adamson_train.json ...\u001b[0m\n", "\u001b[32m2025-12-28 15:25:07.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.pertqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m52\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/adamson_test.json ...\u001b[0m\n", "\u001b[32m2025-12-28 15:25:07.172\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.agents.customize_agent\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m92\u001b[0m - \u001b[33m\u001b[1mBoth `prompt` and `prompt_template` are provided in `CustomizeAgent`. `prompt_template` will be used.\u001b[0m\n", "\u001b[32m2025-12-28 15:25:07.174\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.agents.customize_agent\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m92\u001b[0m - \u001b[33m\u001b[1mBoth `prompt` and `prompt_template` are provided in `CustomizeAgent`. `prompt_template` will be used.\u001b[0m\n", "\u001b[32m2025-12-28 15:25:07.177\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.agents.customize_agent\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m92\u001b[0m - \u001b[33m\u001b[1mBoth `prompt` and `prompt_template` are provided in `CustomizeAgent`. `prompt_template` will be used.\u001b[0m\n", "\u001b[32m2025-12-28 15:25:07.180\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.agents.customize_agent\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m92\u001b[0m - \u001b[33m\u001b[1mBoth `prompt` and `prompt_template` are provided in `CustomizeAgent`. `prompt_template` will be used.\u001b[0m\n", "\u001b[32m2025-12-28 15:25:07.182\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.agents.customize_agent\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m92\u001b[0m - \u001b[33m\u001b[1mBoth `prompt` and `prompt_template` are provided in `CustomizeAgent`. `prompt_template` will be used.\u001b[0m\n", "\u001b[32m2025-12-28 15:25:07.184\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.agents.customize_agent\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m92\u001b[0m - \u001b[33m\u001b[1mBoth `prompt` and `prompt_template` are provided in `CustomizeAgent`. `prompt_template` will be used.\u001b[0m\n" ] } ], "source": [ "benchmark = PertQA()\n", "graphinfo = optimizer.load_module(\"./debug/agent_check_ourloop.json\")\n", "sew_graph = SequentialWorkFlowGraph.from_dict(graphinfo)\n", "agent_manager = AgentManager(tools=[search_toolkit,wiki_toolkit,arxiv_toolkit])\n", "agent_manager.add_agents_from_workflow(sew_graph, llm_config=llm_config)\n", "evaluator = Evaluator(llm=llm, agent_manager=agent_manager, collate_func=collate_func, num_workers=20, verbose=True)\n", "from evoagentx.optimizers import QASTRUCTUREOptimizer, TextGradOptimizer\n", "evaluator = Evaluator(llm=llm, agent_manager=agent_manager, collate_func=collate_func, num_workers=20, verbose=True)\n", "# obtain SEWOptimizer after having more roles\n", "optimizer = QASTRUCTUREOptimizer(\n", " graph=sew_graph, \n", " evaluator=evaluator, \n", " llm=llm, \n", " max_steps=5,\n", " eval_rounds=1, \n", " repr_scheme=\"python\", \n", " optimize_mode=\"all\", \n", " order=\"zero-order\",\n", " max_rounds=1\n", ")\n", "# metrics\n", "\n" ] }, { "cell_type": "code", "execution_count": 18, "id": "39fc99bd", "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 0%| | 1/2500 [00:12<8:23:59, 12.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Task exception was never retrieved\n", "future: exception=RuntimeError('Event loop is closed')>\n", "Traceback (most recent call last):\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/tasks.py\", line 277, in __step\n", " result = coro.send(None)\n", " ^^^^^^^^^^^^^^^\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/litellm/utils.py\", line 873, in _client_async_logging_helper\n", " GLOBAL_LOGGING_WORKER.ensure_initialized_and_enqueue(\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/litellm/litellm_core_utils/logging_worker.py\", line 322, in ensure_initialized_and_enqueue\n", " self.enqueue(async_coroutine)\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/litellm/litellm_core_utils/logging_worker.py\", line 131, in enqueue\n", " self._queue.put_nowait(task)\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/queues.py\", line 147, in put_nowait\n", " self._wakeup_next(self._getters)\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/queues.py\", line 63, in _wakeup_next\n", " waiter.set_result(None)\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/futures.py\", line 263, in set_result\n", " self.__schedule_callbacks()\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/futures.py\", line 173, in __schedule_callbacks\n", " self._loop.call_soon(callback, self, context=ctx)\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/base_events.py\", line 762, in call_soon\n", " self._check_closed()\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/base_events.py\", line 520, in _check_closed\n", " raise RuntimeError('Event loop is closed')\n", "RuntimeError: Event loop is closed\n", "Evaluating workflow: 0%| | 2/2500 [00:24<8:27:39, 12.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 3/2500 [00:36<8:27:45, 12.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 4/2500 [00:49<8:44:05, 12.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 5/2500 [01:02<8:51:29, 12.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 6/2500 [01:15<8:51:15, 12.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 7/2500 [01:28<8:52:13, 12.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 8/2500 [01:40<8:44:58, 12.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 9/2500 [01:53<8:44:36, 12.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 10/2500 [02:06<8:49:31, 12.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 11/2500 [02:19<8:52:12, 12.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 12/2500 [02:31<8:36:43, 12.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 13/2500 [02:44<8:43:27, 12.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 14/2500 [02:55<8:29:15, 12.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 15/2500 [03:08<8:34:36, 12.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 16/2500 [03:19<8:24:27, 12.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 17/2500 [03:33<8:38:46, 12.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 18/2500 [03:47<8:56:37, 12.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 19/2500 [03:59<8:46:16, 12.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 20/2500 [04:12<8:51:58, 12.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 21/2500 [04:24<8:38:40, 12.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 22/2500 [04:36<8:27:09, 12.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 23/2500 [04:48<8:31:16, 12.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 24/2500 [05:01<8:30:15, 12.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 25/2500 [05:14<8:37:30, 12.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 26/2500 [05:27<8:43:52, 12.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 27/2500 [05:40<8:48:40, 12.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 28/2500 [05:54<9:06:37, 13.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 29/2500 [06:06<8:49:35, 12.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 30/2500 [06:14<7:50:06, 11.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 31/2500 [06:23<7:16:40, 10.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%|▏ | 32/2500 [06:31<6:51:26, 10.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%|▏ | 33/2500 [06:41<6:43:09, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%|▏ | 34/2500 [06:49<6:23:41, 9.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%|▏ | 35/2500 [06:59<6:32:48, 9.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%|▏ | 36/2500 [07:08<6:25:06, 9.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%|▏ | 37/2500 [07:17<6:18:46, 9.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 38/2500 [07:25<6:09:32, 9.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 39/2500 [07:34<6:03:20, 8.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 40/2500 [07:43<6:09:25, 9.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 41/2500 [07:52<6:06:50, 8.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 42/2500 [08:02<6:13:44, 9.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 43/2500 [08:10<6:02:23, 8.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 44/2500 [08:19<6:05:32, 8.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 45/2500 [08:28<6:05:58, 8.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 46/2500 [08:36<6:02:05, 8.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 47/2500 [08:45<6:01:13, 8.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 48/2500 [08:54<6:03:59, 8.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 49/2500 [09:02<5:51:36, 8.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 50/2500 [09:11<5:53:59, 8.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 51/2500 [09:20<5:53:35, 8.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 52/2500 [09:29<5:56:23, 8.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 53/2500 [09:38<6:03:57, 8.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 54/2500 [09:47<5:59:49, 8.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 55/2500 [09:58<6:30:40, 9.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 56/2500 [10:06<6:18:01, 9.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 57/2500 [10:15<6:06:03, 8.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 58/2500 [10:24<6:05:40, 8.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 59/2500 [10:34<6:22:01, 9.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 60/2500 [10:42<6:07:43, 9.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 61/2500 [10:52<6:12:10, 9.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 62/2500 [11:01<6:18:50, 9.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 63/2500 [11:11<6:20:09, 9.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 64/2500 [11:19<6:08:36, 9.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 65/2500 [11:29<6:18:40, 9.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 66/2500 [11:39<6:21:36, 9.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 67/2500 [11:47<6:08:28, 9.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 68/2500 [11:56<6:08:50, 9.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 69/2500 [12:05<6:01:08, 8.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 70/2500 [12:14<6:06:08, 9.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 71/2500 [12:23<6:08:16, 9.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 72/2500 [12:33<6:17:30, 9.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 73/2500 [12:42<6:07:09, 9.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 74/2500 [12:50<5:57:55, 8.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 75/2500 [12:59<5:55:37, 8.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 76/2500 [13:07<5:52:41, 8.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 77/2500 [13:16<5:55:12, 8.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 78/2500 [13:27<6:15:07, 9.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 79/2500 [13:35<6:08:17, 9.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 80/2500 [13:45<6:09:55, 9.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 81/2500 [13:53<6:04:17, 9.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 82/2500 [14:02<5:55:48, 8.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 83/2500 [14:11<6:00:21, 8.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 84/2500 [14:20<5:56:22, 8.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 85/2500 [14:29<5:57:09, 8.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 86/2500 [14:39<6:14:24, 9.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 87/2500 [14:48<6:07:29, 9.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 88/2500 [14:56<5:57:20, 8.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 89/2500 [15:05<6:02:20, 9.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 90/2500 [15:15<6:06:25, 9.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 91/2500 [15:24<6:04:23, 9.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 92/2500 [15:32<5:59:30, 8.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 93/2500 [15:42<6:06:05, 9.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 94/2500 [15:53<6:25:46, 9.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 95/2500 [16:03<6:30:42, 9.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 96/2500 [16:11<6:17:31, 9.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 97/2500 [16:20<6:12:21, 9.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 98/2500 [16:29<6:00:58, 9.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 99/2500 [16:38<6:01:08, 9.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 100/2500 [16:47<6:04:13, 9.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 101/2500 [16:57<6:16:27, 9.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 102/2500 [17:05<6:01:43, 9.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 103/2500 [17:15<6:13:48, 9.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 104/2500 [17:25<6:17:48, 9.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 105/2500 [17:34<6:08:26, 9.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 106/2500 [17:42<6:00:38, 9.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 107/2500 [17:52<6:03:50, 9.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 108/2500 [18:00<5:56:16, 8.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 109/2500 [18:10<6:06:10, 9.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 110/2500 [18:19<5:59:00, 9.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 111/2500 [18:27<5:51:59, 8.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 112/2500 [18:36<5:56:16, 8.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 113/2500 [18:45<5:53:34, 8.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 114/2500 [18:55<6:04:44, 9.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 115/2500 [19:04<6:00:45, 9.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 116/2500 [19:13<6:04:39, 9.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 117/2500 [19:22<6:05:31, 9.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 118/2500 [19:31<6:03:01, 9.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 119/2500 [19:39<5:50:22, 8.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 120/2500 [19:48<5:50:25, 8.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 121/2500 [19:57<5:53:22, 8.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 122/2500 [20:06<5:50:03, 8.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 123/2500 [20:16<5:59:16, 9.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 124/2500 [20:25<5:59:50, 9.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 125/2500 [20:33<5:49:55, 8.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 126/2500 [20:42<5:46:31, 8.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 127/2500 [20:50<5:44:31, 8.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 128/2500 [20:59<5:50:48, 8.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 129/2500 [21:07<5:41:10, 8.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 130/2500 [21:17<5:53:45, 8.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 131/2500 [21:26<5:49:22, 8.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 132/2500 [21:34<5:39:06, 8.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 133/2500 [21:42<5:36:04, 8.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 134/2500 [21:58<7:02:17, 10.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 135/2500 [22:07<6:43:54, 10.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 136/2500 [22:16<6:28:38, 9.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 137/2500 [22:26<6:32:43, 9.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 138/2500 [22:36<6:24:40, 9.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 139/2500 [22:45<6:16:14, 9.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 140/2500 [22:54<6:14:09, 9.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 141/2500 [23:03<6:06:41, 9.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 142/2500 [23:12<6:00:51, 9.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 143/2500 [23:21<6:00:57, 9.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 144/2500 [23:30<6:00:04, 9.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 145/2500 [23:39<5:55:38, 9.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 146/2500 [23:48<6:00:17, 9.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 147/2500 [23:57<5:51:11, 8.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 148/2500 [24:06<5:47:51, 8.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 149/2500 [24:15<5:53:03, 9.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 150/2500 [24:23<5:42:26, 8.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 151/2500 [24:32<5:50:05, 8.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 152/2500 [24:45<6:31:18, 10.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 153/2500 [24:55<6:30:37, 9.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 154/2500 [25:05<6:28:58, 9.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 155/2500 [25:14<6:16:48, 9.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 156/2500 [25:23<6:11:08, 9.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 157/2500 [25:32<6:07:09, 9.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 158/2500 [25:41<5:58:08, 9.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 159/2500 [25:50<5:58:06, 9.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 160/2500 [26:02<6:29:09, 9.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 161/2500 [26:11<6:25:19, 9.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 162/2500 [26:22<6:32:10, 10.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 163/2500 [26:31<6:25:26, 9.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 164/2500 [26:40<6:13:18, 9.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 165/2500 [26:49<6:00:02, 9.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 166/2500 [26:57<5:47:37, 8.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 167/2500 [27:07<6:01:34, 9.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 168/2500 [27:16<6:00:45, 9.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 169/2500 [27:25<5:54:21, 9.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 170/2500 [27:34<5:56:38, 9.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 171/2500 [27:43<5:46:03, 8.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 172/2500 [27:52<5:50:00, 9.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 173/2500 [28:02<6:04:38, 9.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 174/2500 [28:10<5:51:04, 9.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 175/2500 [28:19<5:46:00, 8.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 176/2500 [28:28<5:43:07, 8.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 177/2500 [28:36<5:35:59, 8.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 178/2500 [28:44<5:31:14, 8.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 179/2500 [28:52<5:20:34, 8.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 180/2500 [29:01<5:25:50, 8.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 181/2500 [29:16<6:42:46, 10.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 182/2500 [29:24<6:21:51, 9.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 183/2500 [29:33<6:07:20, 9.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 184/2500 [29:45<6:31:50, 10.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 185/2500 [29:58<7:04:50, 11.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 186/2500 [30:06<6:37:12, 10.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 187/2500 [30:15<6:20:14, 9.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 188/2500 [30:23<5:53:36, 9.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 189/2500 [30:31<5:44:43, 8.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 190/2500 [30:40<5:39:51, 8.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 191/2500 [30:47<5:25:04, 8.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 192/2500 [30:57<5:39:36, 8.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 193/2500 [31:05<5:32:54, 8.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 194/2500 [31:13<5:22:33, 8.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 195/2500 [31:21<5:21:27, 8.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 196/2500 [31:30<5:23:50, 8.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 197/2500 [31:38<5:25:14, 8.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 198/2500 [31:47<5:22:29, 8.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 199/2500 [31:57<5:45:00, 9.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 200/2500 [32:07<5:50:06, 9.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 201/2500 [32:15<5:47:00, 9.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 202/2500 [32:24<5:45:08, 9.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 203/2500 [32:33<5:40:09, 8.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 204/2500 [32:41<5:34:57, 8.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 205/2500 [32:50<5:28:56, 8.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 206/2500 [32:58<5:27:55, 8.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 207/2500 [33:06<5:19:14, 8.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 208/2500 [33:15<5:32:27, 8.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 209/2500 [33:24<5:33:15, 8.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 210/2500 [33:33<5:31:14, 8.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 211/2500 [33:41<5:29:25, 8.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 212/2500 [33:51<5:35:14, 8.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▊ | 213/2500 [33:59<5:33:59, 8.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▊ | 214/2500 [34:09<5:42:02, 8.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▊ | 215/2500 [34:18<5:45:01, 9.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▊ | 216/2500 [34:26<5:36:20, 8.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▊ | 217/2500 [34:35<5:35:23, 8.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▊ | 218/2500 [34:44<5:38:30, 8.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 219/2500 [34:55<6:03:34, 9.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 220/2500 [35:09<6:54:52, 10.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 221/2500 [35:18<6:28:30, 10.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 222/2500 [35:27<6:17:44, 9.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 223/2500 [35:37<6:12:26, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 224/2500 [35:45<5:53:21, 9.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 225/2500 [35:53<5:43:38, 9.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 226/2500 [36:04<5:57:04, 9.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 227/2500 [36:13<5:56:35, 9.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 228/2500 [36:22<5:56:42, 9.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 229/2500 [36:31<5:46:00, 9.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 230/2500 [36:39<5:33:36, 8.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 231/2500 [36:48<5:32:11, 8.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 232/2500 [36:57<5:36:05, 8.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 233/2500 [37:06<5:44:38, 9.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 234/2500 [37:15<5:35:58, 8.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 235/2500 [37:24<5:34:25, 8.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 236/2500 [37:33<5:36:39, 8.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 237/2500 [37:41<5:29:47, 8.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 238/2500 [37:51<5:42:17, 9.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 239/2500 [37:59<5:27:24, 8.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 240/2500 [38:07<5:24:55, 8.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 241/2500 [38:17<5:40:24, 9.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 242/2500 [38:27<5:44:35, 9.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 243/2500 [38:36<5:42:56, 9.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 244/2500 [38:45<5:40:26, 9.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 245/2500 [38:53<5:34:53, 8.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 246/2500 [39:03<5:47:38, 9.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 247/2500 [39:15<6:13:44, 9.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 248/2500 [39:24<6:03:00, 9.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 249/2500 [39:34<6:04:18, 9.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 250/2500 [39:44<6:14:15, 9.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 251/2500 [39:54<6:07:44, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 252/2500 [40:03<6:06:51, 9.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 253/2500 [40:14<6:14:19, 10.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 254/2500 [40:23<5:59:58, 9.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 255/2500 [40:32<5:57:54, 9.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 256/2500 [40:41<5:56:34, 9.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 257/2500 [40:52<6:03:52, 9.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 258/2500 [41:01<6:04:21, 9.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 259/2500 [41:11<5:58:40, 9.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 260/2500 [41:21<6:06:50, 9.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 261/2500 [41:30<5:58:39, 9.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 262/2500 [41:40<6:02:07, 9.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 263/2500 [41:49<5:58:00, 9.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 264/2500 [42:01<6:18:06, 10.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 265/2500 [42:11<6:14:49, 10.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 266/2500 [42:20<6:11:08, 9.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 267/2500 [42:30<6:03:01, 9.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 268/2500 [42:40<6:12:52, 10.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 269/2500 [42:49<5:59:13, 9.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 270/2500 [42:59<5:55:31, 9.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 271/2500 [43:08<5:50:57, 9.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 272/2500 [43:17<5:49:46, 9.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 273/2500 [43:26<5:44:11, 9.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 274/2500 [43:35<5:39:27, 9.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 275/2500 [43:43<5:32:25, 8.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 276/2500 [43:51<5:22:43, 8.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 277/2500 [44:00<5:24:39, 8.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 278/2500 [44:09<5:26:04, 8.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 279/2500 [44:19<5:32:58, 9.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 280/2500 [44:28<5:35:18, 9.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 281/2500 [44:37<5:31:26, 8.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 282/2500 [44:45<5:27:32, 8.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 283/2500 [44:54<5:31:06, 8.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 284/2500 [45:03<5:27:54, 8.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 285/2500 [45:12<5:25:25, 8.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 286/2500 [45:21<5:28:48, 8.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 287/2500 [45:30<5:29:00, 8.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 288/2500 [45:38<5:23:59, 8.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 289/2500 [45:47<5:19:54, 8.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 290/2500 [45:55<5:18:12, 8.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 291/2500 [46:17<7:39:54, 12.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 292/2500 [46:26<6:57:45, 11.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 293/2500 [46:35<6:33:48, 10.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 294/2500 [46:44<6:13:29, 10.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 295/2500 [46:52<5:53:48, 9.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 296/2500 [47:01<5:51:16, 9.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 297/2500 [47:09<5:34:00, 9.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 298/2500 [47:18<5:29:07, 8.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 299/2500 [47:27<5:25:54, 8.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 300/2500 [47:35<5:15:03, 8.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 966881.915349768)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 966906.383591074)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 966919.591761252)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 966894.174150227)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 966932.696866258)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 966945.477407908)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 966958.344591251)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 966970.621421472)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967009.27416208)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 966983.249780638)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 966996.285348395)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 967020.892322156)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967045.417992219)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967033.907206892)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 967058.153129869)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 967069.780172525)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967097.121459345)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967109.278230727)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967083.132530152)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 967122.481029164)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967145.937779695)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 967170.885952551)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967134.296336077)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967158.565998344)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967196.930568276)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967210.04126482)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967183.853212678)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967236.244010401)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 967244.304566302)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967253.031279163)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967224.337452898)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967261.612512242)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967270.956696326)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967279.196274839)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 967289.284733614)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967298.23346982)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967307.10969998)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967315.598943544)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967324.11361078)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967333.478879893)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967351.815190368)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967360.026479172)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967342.290808734)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967369.144771955)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967378.122807212)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 967386.762421742)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 967395.557033921)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967404.629938983)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967412.538871039)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 967421.351649674)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 967430.000725129)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967448.269851338)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967468.229613412)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 967456.867292778)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967438.903998063)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967476.794362188)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967485.108347964)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 967494.079217212)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967504.416293627)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967522.066009153)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967531.781338458)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967512.646950257)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967541.225476932)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967549.64971991)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967559.568048851)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967569.152328807)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967577.492747074)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967586.622882107)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967595.101193569)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967604.438278964)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967613.666339988)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967623.537254669)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967640.353616724)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967632.025730784)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967649.028681899)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967677.00012802)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967657.598140324)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 967666.548112709)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967703.734829199)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967685.741621633)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 967695.016703743)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 967712.081302511)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967721.299095835)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967729.927058052)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 967738.854275147)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967749.168795914)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967757.91438226)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967766.223785675)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967775.539471121)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967793.874641433)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967812.074278056)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967784.908645104)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 967802.557887858)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967822.849282193)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967832.893277894)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967841.557008368)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967850.562228408)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967867.96862106)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967877.262487973)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967887.400638077)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967858.925648544)])']\n", "connector: \n", "Evaluating workflow: 12%|█▏ | 301/2500 [47:44<5:25:01, 8.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 302/2500 [47:53<5:19:58, 8.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 303/2500 [48:01<5:17:01, 8.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 304/2500 [48:09<5:13:21, 8.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 305/2500 [48:19<5:20:53, 8.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 306/2500 [48:26<5:08:06, 8.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 307/2500 [48:35<5:15:32, 8.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 308/2500 [48:47<5:44:55, 9.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 309/2500 [48:55<5:35:50, 9.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 310/2500 [49:04<5:31:19, 9.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 311/2500 [49:13<5:28:56, 9.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 312/2500 [49:26<6:08:57, 10.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 313/2500 [49:34<5:46:15, 9.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 314/2500 [49:42<5:34:12, 9.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 315/2500 [49:51<5:26:12, 8.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 316/2500 [49:59<5:20:18, 8.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 317/2500 [50:08<5:22:35, 8.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 318/2500 [50:17<5:19:57, 8.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 319/2500 [50:26<5:27:41, 9.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 320/2500 [50:35<5:21:19, 8.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 321/2500 [50:42<5:09:19, 8.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 322/2500 [50:51<5:06:09, 8.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 323/2500 [51:00<5:16:26, 8.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 324/2500 [51:10<5:31:31, 9.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 325/2500 [51:18<5:20:46, 8.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 326/2500 [51:27<5:20:23, 8.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 327/2500 [51:36<5:19:54, 8.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 328/2500 [51:45<5:15:49, 8.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 329/2500 [51:53<5:11:57, 8.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 330/2500 [52:06<5:58:20, 9.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 331/2500 [52:14<5:39:20, 9.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 332/2500 [52:28<6:28:37, 10.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 333/2500 [52:37<6:07:03, 10.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 334/2500 [52:46<5:57:20, 9.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 335/2500 [52:55<5:46:40, 9.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 336/2500 [53:03<5:28:48, 9.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 337/2500 [53:12<5:31:43, 9.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 338/2500 [53:21<5:23:23, 8.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 339/2500 [53:30<5:21:22, 8.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 340/2500 [53:38<5:17:06, 8.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 341/2500 [53:47<5:12:55, 8.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 342/2500 [53:56<5:17:35, 8.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 343/2500 [54:05<5:20:24, 8.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 344/2500 [54:15<5:38:54, 9.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 345/2500 [54:29<6:24:32, 10.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 346/2500 [54:41<6:32:56, 10.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 347/2500 [54:50<6:15:34, 10.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 348/2500 [54:58<5:50:50, 9.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 349/2500 [55:07<5:39:22, 9.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 350/2500 [55:15<5:26:12, 9.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 351/2500 [55:23<5:17:00, 8.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 352/2500 [55:32<5:15:28, 8.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 353/2500 [55:40<5:08:06, 8.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 354/2500 [55:49<5:10:31, 8.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 355/2500 [55:59<5:23:52, 9.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 356/2500 [56:10<5:42:00, 9.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 357/2500 [56:19<5:39:09, 9.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 358/2500 [56:28<5:37:49, 9.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 359/2500 [56:37<5:22:52, 9.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 360/2500 [56:45<5:20:09, 8.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 361/2500 [56:54<5:11:15, 8.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 362/2500 [57:02<5:09:09, 8.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 363/2500 [57:11<5:09:57, 8.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 364/2500 [57:20<5:11:09, 8.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 365/2500 [57:29<5:16:03, 8.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 366/2500 [57:39<5:25:04, 9.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 367/2500 [57:47<5:17:48, 8.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 368/2500 [57:58<5:34:29, 9.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 369/2500 [58:06<5:25:55, 9.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 370/2500 [58:15<5:21:52, 9.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 371/2500 [58:24<5:19:08, 8.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 372/2500 [58:33<5:20:46, 9.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 373/2500 [58:41<5:13:57, 8.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 374/2500 [58:50<5:10:22, 8.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 375/2500 [58:59<5:12:30, 8.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 376/2500 [59:08<5:13:37, 8.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 377/2500 [59:17<5:20:10, 9.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 378/2500 [59:26<5:14:30, 8.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 379/2500 [59:39<5:54:40, 10.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 380/2500 [59:47<5:40:38, 9.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 381/2500 [59:56<5:30:38, 9.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 382/2500 [1:00:04<5:17:12, 8.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 383/2500 [1:00:14<5:23:31, 9.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 384/2500 [1:00:22<5:16:20, 8.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 385/2500 [1:00:31<5:13:21, 8.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 386/2500 [1:00:40<5:16:10, 8.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 387/2500 [1:00:49<5:13:22, 8.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 388/2500 [1:00:59<5:22:49, 9.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 389/2500 [1:01:07<5:14:06, 8.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 390/2500 [1:01:16<5:13:26, 8.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 391/2500 [1:01:24<5:03:39, 8.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 392/2500 [1:01:33<5:07:57, 8.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 393/2500 [1:01:42<5:09:00, 8.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 394/2500 [1:01:50<4:56:51, 8.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 395/2500 [1:01:58<4:58:27, 8.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 396/2500 [1:02:06<4:55:31, 8.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 397/2500 [1:02:15<4:54:56, 8.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 398/2500 [1:02:23<4:56:51, 8.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 399/2500 [1:02:33<5:09:10, 8.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 400/2500 [1:02:41<5:04:49, 8.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 401/2500 [1:02:53<5:32:13, 9.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 402/2500 [1:03:03<5:40:39, 9.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 403/2500 [1:03:12<5:27:48, 9.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 404/2500 [1:03:21<5:26:41, 9.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 405/2500 [1:03:30<5:18:15, 9.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 406/2500 [1:03:38<5:11:51, 8.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 407/2500 [1:03:46<5:00:37, 8.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 408/2500 [1:03:56<5:14:51, 9.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 409/2500 [1:04:04<5:07:53, 8.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 410/2500 [1:04:14<5:12:57, 8.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 411/2500 [1:04:24<5:24:57, 9.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 412/2500 [1:04:32<5:12:12, 8.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 413/2500 [1:04:41<5:08:52, 8.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 414/2500 [1:04:50<5:15:34, 9.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 415/2500 [1:04:59<5:13:17, 9.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 416/2500 [1:05:09<5:25:28, 9.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 417/2500 [1:05:18<5:18:25, 9.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 418/2500 [1:05:26<5:11:48, 8.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 419/2500 [1:05:36<5:18:12, 9.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 420/2500 [1:05:45<5:18:04, 9.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 421/2500 [1:05:54<5:11:29, 8.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 422/2500 [1:06:04<5:27:50, 9.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 423/2500 [1:06:15<5:37:14, 9.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 424/2500 [1:06:25<5:46:36, 10.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 425/2500 [1:06:37<6:03:22, 10.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 426/2500 [1:06:46<5:48:22, 10.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 427/2500 [1:06:56<5:44:18, 9.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 428/2500 [1:07:07<6:01:22, 10.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 429/2500 [1:07:16<5:40:57, 9.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 430/2500 [1:07:26<5:37:32, 9.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 431/2500 [1:07:35<5:31:19, 9.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 432/2500 [1:07:44<5:26:46, 9.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 433/2500 [1:07:54<5:32:53, 9.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 434/2500 [1:08:05<5:43:24, 9.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 435/2500 [1:08:16<6:00:26, 10.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 436/2500 [1:08:25<5:45:42, 10.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 437/2500 [1:08:35<5:43:43, 10.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 438/2500 [1:08:45<5:41:23, 9.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 439/2500 [1:08:54<5:33:31, 9.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 440/2500 [1:09:05<5:46:40, 10.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 441/2500 [1:09:14<5:37:32, 9.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 442/2500 [1:09:24<5:31:44, 9.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 443/2500 [1:09:33<5:31:40, 9.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 444/2500 [1:09:43<5:32:23, 9.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 445/2500 [1:09:53<5:38:21, 9.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 446/2500 [1:10:03<5:30:01, 9.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 447/2500 [1:10:12<5:32:13, 9.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 448/2500 [1:10:22<5:30:06, 9.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 449/2500 [1:10:31<5:26:09, 9.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 450/2500 [1:10:40<5:16:12, 9.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 451/2500 [1:10:50<5:21:30, 9.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 452/2500 [1:11:00<5:32:20, 9.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 453/2500 [1:11:10<5:35:56, 9.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 454/2500 [1:11:21<5:50:30, 10.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 455/2500 [1:11:31<5:41:14, 10.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 456/2500 [1:11:45<6:20:40, 11.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 457/2500 [1:11:55<6:14:43, 11.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 458/2500 [1:12:06<6:08:16, 10.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 459/2500 [1:12:15<5:54:08, 10.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 460/2500 [1:12:25<5:43:41, 10.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 461/2500 [1:12:36<5:53:45, 10.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 462/2500 [1:12:45<5:42:21, 10.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▊ | 463/2500 [1:12:57<6:01:11, 10.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▊ | 464/2500 [1:13:07<5:52:32, 10.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▊ | 465/2500 [1:13:17<5:53:53, 10.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▊ | 466/2500 [1:13:27<5:42:16, 10.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▊ | 467/2500 [1:13:36<5:36:27, 9.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▊ | 468/2500 [1:13:45<5:24:59, 9.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 469/2500 [1:13:56<5:42:48, 10.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 470/2500 [1:14:06<5:34:57, 9.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 471/2500 [1:14:18<5:58:50, 10.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 472/2500 [1:14:27<5:45:08, 10.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 473/2500 [1:14:38<5:44:57, 10.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 474/2500 [1:14:48<5:44:07, 10.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 475/2500 [1:14:58<5:47:04, 10.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 476/2500 [1:15:09<5:48:27, 10.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 477/2500 [1:15:20<5:54:40, 10.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 478/2500 [1:15:29<5:42:42, 10.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 479/2500 [1:15:38<5:34:11, 9.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 480/2500 [1:15:50<5:52:49, 10.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 481/2500 [1:16:01<5:59:19, 10.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 482/2500 [1:16:13<6:11:37, 11.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 483/2500 [1:16:24<6:07:59, 10.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 484/2500 [1:16:33<5:54:32, 10.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 485/2500 [1:16:43<5:42:50, 10.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 486/2500 [1:16:53<5:37:58, 10.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 487/2500 [1:17:02<5:27:01, 9.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 488/2500 [1:17:11<5:23:34, 9.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 489/2500 [1:17:21<5:28:45, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 490/2500 [1:17:31<5:27:48, 9.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 491/2500 [1:17:40<5:25:06, 9.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 492/2500 [1:17:50<5:27:09, 9.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 493/2500 [1:18:01<5:34:36, 10.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 494/2500 [1:18:10<5:26:52, 9.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 495/2500 [1:18:21<5:40:31, 10.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 496/2500 [1:18:30<5:27:43, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 497/2500 [1:18:41<5:35:37, 10.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 498/2500 [1:18:50<5:30:31, 9.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 499/2500 [1:19:00<5:27:45, 9.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 500/2500 [1:19:09<5:21:22, 9.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 501/2500 [1:19:18<5:14:59, 9.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 502/2500 [1:19:27<5:09:56, 9.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 503/2500 [1:19:37<5:13:46, 9.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 504/2500 [1:19:47<5:19:39, 9.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 505/2500 [1:19:56<5:15:31, 9.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 506/2500 [1:20:06<5:14:46, 9.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 507/2500 [1:20:16<5:23:45, 9.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 508/2500 [1:20:25<5:16:10, 9.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 509/2500 [1:20:34<5:13:09, 9.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 510/2500 [1:20:44<5:11:57, 9.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 511/2500 [1:20:53<5:11:07, 9.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 512/2500 [1:21:01<4:57:59, 8.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 513/2500 [1:21:13<5:27:37, 9.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 514/2500 [1:21:22<5:18:32, 9.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 515/2500 [1:21:32<5:23:06, 9.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 516/2500 [1:21:44<5:45:33, 10.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 517/2500 [1:21:53<5:28:54, 9.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 518/2500 [1:22:03<5:31:51, 10.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 519/2500 [1:22:13<5:29:49, 9.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 520/2500 [1:22:23<5:31:24, 10.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 521/2500 [1:22:34<5:35:09, 10.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 522/2500 [1:22:44<5:35:15, 10.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 523/2500 [1:22:54<5:30:19, 10.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 524/2500 [1:23:03<5:23:14, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 525/2500 [1:23:13<5:25:58, 9.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 526/2500 [1:23:23<5:29:39, 10.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 527/2500 [1:23:33<5:31:00, 10.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 528/2500 [1:23:44<5:33:58, 10.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 529/2500 [1:23:52<5:18:37, 9.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 530/2500 [1:24:02<5:15:58, 9.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 531/2500 [1:24:12<5:22:36, 9.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 532/2500 [1:24:22<5:20:40, 9.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 533/2500 [1:24:32<5:21:07, 9.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 534/2500 [1:24:40<5:10:09, 9.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 535/2500 [1:24:50<5:12:06, 9.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 536/2500 [1:25:00<5:16:10, 9.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 537/2500 [1:25:09<5:09:25, 9.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 538/2500 [1:25:19<5:09:46, 9.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 539/2500 [1:25:28<5:09:48, 9.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 540/2500 [1:25:37<5:07:30, 9.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 541/2500 [1:25:47<5:10:26, 9.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 542/2500 [1:25:58<5:24:00, 9.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 543/2500 [1:26:09<5:32:21, 10.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 544/2500 [1:26:19<5:36:10, 10.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 545/2500 [1:26:29<5:30:53, 10.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 546/2500 [1:26:38<5:20:34, 9.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 547/2500 [1:26:47<5:07:06, 9.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 548/2500 [1:26:57<5:19:13, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 549/2500 [1:27:07<5:15:36, 9.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 550/2500 [1:27:15<5:03:55, 9.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 551/2500 [1:27:24<4:57:19, 9.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 552/2500 [1:27:36<5:21:09, 9.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 553/2500 [1:27:44<5:08:50, 9.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 554/2500 [1:27:53<4:56:00, 9.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 555/2500 [1:28:02<4:59:53, 9.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 556/2500 [1:28:12<5:04:45, 9.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 557/2500 [1:28:20<4:56:52, 9.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 558/2500 [1:28:29<4:54:37, 9.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 559/2500 [1:28:40<5:08:25, 9.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 560/2500 [1:28:50<5:11:59, 9.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 561/2500 [1:28:59<5:06:40, 9.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 562/2500 [1:29:08<4:59:57, 9.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 563/2500 [1:29:18<5:12:42, 9.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 564/2500 [1:29:32<5:50:22, 10.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 565/2500 [1:29:42<5:43:40, 10.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 566/2500 [1:29:52<5:31:44, 10.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 567/2500 [1:30:01<5:21:47, 9.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 568/2500 [1:30:10<5:14:07, 9.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 569/2500 [1:30:20<5:10:49, 9.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 570/2500 [1:30:29<5:09:26, 9.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 571/2500 [1:30:39<5:10:10, 9.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 572/2500 [1:30:48<5:06:05, 9.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 573/2500 [1:30:59<5:21:09, 10.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 574/2500 [1:31:08<5:13:58, 9.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 575/2500 [1:31:20<5:26:32, 10.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 576/2500 [1:31:30<5:25:24, 10.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 577/2500 [1:31:40<5:22:58, 10.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 578/2500 [1:31:50<5:23:52, 10.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 579/2500 [1:31:59<5:15:00, 9.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 580/2500 [1:32:08<5:10:55, 9.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 581/2500 [1:32:17<5:04:03, 9.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 582/2500 [1:32:26<4:57:01, 9.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 583/2500 [1:32:36<5:04:04, 9.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 584/2500 [1:32:45<4:56:08, 9.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 585/2500 [1:32:54<4:55:27, 9.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 586/2500 [1:33:04<5:04:58, 9.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 587/2500 [1:33:13<4:57:51, 9.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 588/2500 [1:33:23<5:00:01, 9.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 589/2500 [1:33:31<4:52:28, 9.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 590/2500 [1:33:42<5:01:10, 9.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 591/2500 [1:33:52<5:07:51, 9.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 592/2500 [1:34:00<4:58:51, 9.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 593/2500 [1:34:12<5:23:05, 10.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 594/2500 [1:34:22<5:15:31, 9.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 595/2500 [1:34:32<5:17:07, 9.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 596/2500 [1:34:41<5:08:15, 9.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 597/2500 [1:34:51<5:09:12, 9.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 598/2500 [1:35:01<5:10:58, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 599/2500 [1:35:13<5:33:21, 10.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 600/2500 [1:35:23<5:23:53, 10.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 601/2500 [1:35:32<5:15:28, 9.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 602/2500 [1:35:42<5:16:41, 10.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 603/2500 [1:35:51<5:09:02, 9.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 604/2500 [1:36:01<5:06:00, 9.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 605/2500 [1:36:11<5:13:19, 9.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 606/2500 [1:36:20<5:03:41, 9.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 607/2500 [1:36:29<4:56:55, 9.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 608/2500 [1:36:38<4:57:29, 9.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 609/2500 [1:36:48<5:02:35, 9.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 610/2500 [1:36:58<5:05:31, 9.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 611/2500 [1:37:08<5:05:59, 9.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 612/2500 [1:37:19<5:13:21, 9.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 613/2500 [1:37:28<5:06:06, 9.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 614/2500 [1:37:38<5:10:01, 9.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 615/2500 [1:37:49<5:19:14, 10.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 616/2500 [1:38:00<5:25:23, 10.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 617/2500 [1:38:09<5:15:47, 10.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 618/2500 [1:38:19<5:15:58, 10.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 619/2500 [1:38:29<5:15:43, 10.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 620/2500 [1:38:40<5:20:20, 10.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 621/2500 [1:38:50<5:20:52, 10.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 622/2500 [1:38:59<5:10:15, 9.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 623/2500 [1:39:15<6:05:38, 11.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 624/2500 [1:39:25<5:51:44, 11.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 625/2500 [1:39:36<5:43:22, 10.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 626/2500 [1:39:46<5:32:08, 10.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 627/2500 [1:39:54<5:16:09, 10.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 628/2500 [1:40:05<5:17:37, 10.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 629/2500 [1:40:14<5:11:32, 9.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 630/2500 [1:40:24<5:09:27, 9.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 631/2500 [1:40:34<5:08:48, 9.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 632/2500 [1:40:44<5:11:41, 10.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 633/2500 [1:40:54<5:12:29, 10.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 634/2500 [1:41:04<5:09:41, 9.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 635/2500 [1:41:14<5:12:14, 10.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 636/2500 [1:41:25<5:13:30, 10.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 637/2500 [1:41:35<5:19:42, 10.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 638/2500 [1:41:45<5:12:09, 10.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 639/2500 [1:41:53<4:57:16, 9.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 640/2500 [1:42:03<4:57:32, 9.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 641/2500 [1:42:14<5:09:18, 9.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 642/2500 [1:42:24<5:06:49, 9.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 643/2500 [1:42:33<5:06:02, 9.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 644/2500 [1:42:43<4:58:56, 9.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 645/2500 [1:42:52<4:58:37, 9.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 646/2500 [1:43:02<5:03:08, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 647/2500 [1:43:13<5:10:50, 10.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 648/2500 [1:43:22<5:00:52, 9.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 649/2500 [1:43:32<5:00:29, 9.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 650/2500 [1:43:41<4:58:52, 9.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 651/2500 [1:43:51<4:56:05, 9.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 652/2500 [1:44:01<5:00:25, 9.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 653/2500 [1:44:14<5:27:47, 10.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 654/2500 [1:44:23<5:14:32, 10.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 655/2500 [1:44:32<5:00:20, 9.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 656/2500 [1:44:41<4:54:05, 9.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 657/2500 [1:44:50<4:50:29, 9.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 658/2500 [1:44:59<4:50:35, 9.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 659/2500 [1:45:11<5:06:50, 10.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 660/2500 [1:45:20<5:02:19, 9.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 661/2500 [1:45:29<4:52:18, 9.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 662/2500 [1:45:39<4:56:41, 9.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 663/2500 [1:45:48<4:52:44, 9.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 664/2500 [1:45:59<5:04:21, 9.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 665/2500 [1:46:08<4:59:20, 9.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 666/2500 [1:46:18<4:55:38, 9.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 667/2500 [1:46:29<5:05:36, 10.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 668/2500 [1:46:39<5:07:11, 10.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 669/2500 [1:46:48<5:00:08, 9.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 670/2500 [1:46:57<4:55:27, 9.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 671/2500 [1:47:06<4:43:42, 9.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 672/2500 [1:47:17<4:56:05, 9.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 673/2500 [1:47:27<4:58:49, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 674/2500 [1:47:37<5:02:09, 9.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 675/2500 [1:47:47<5:03:06, 9.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 676/2500 [1:47:57<5:00:17, 9.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 677/2500 [1:48:15<6:14:39, 12.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 678/2500 [1:48:24<5:49:47, 11.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 679/2500 [1:48:34<5:34:32, 11.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 680/2500 [1:48:45<5:33:28, 10.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 681/2500 [1:48:54<5:14:56, 10.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 682/2500 [1:49:04<5:12:30, 10.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 683/2500 [1:49:14<5:06:53, 10.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 684/2500 [1:49:23<5:00:25, 9.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 685/2500 [1:49:33<4:58:20, 9.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 686/2500 [1:49:43<5:01:43, 9.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 687/2500 [1:49:52<4:46:55, 9.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 688/2500 [1:50:00<4:39:49, 9.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 689/2500 [1:50:10<4:44:43, 9.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 690/2500 [1:50:20<4:46:46, 9.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 691/2500 [1:50:29<4:44:36, 9.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 692/2500 [1:50:39<4:49:00, 9.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 693/2500 [1:50:49<4:53:56, 9.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 694/2500 [1:50:58<4:48:43, 9.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 695/2500 [1:51:08<4:44:51, 9.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 696/2500 [1:51:16<4:38:35, 9.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 697/2500 [1:51:25<4:32:30, 9.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 698/2500 [1:51:34<4:31:35, 9.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 699/2500 [1:51:45<4:45:58, 9.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 700/2500 [1:51:55<4:55:15, 9.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 701/2500 [1:52:06<5:00:12, 10.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 702/2500 [1:52:15<4:56:32, 9.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 703/2500 [1:52:24<4:43:54, 9.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 704/2500 [1:52:33<4:37:35, 9.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967905.671034932)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967915.376336911)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967895.601086305)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967924.067907272)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967941.97850148)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967932.659623663)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967950.481105909)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967968.860026481)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967977.298617017)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 967986.509782071)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967995.246855763)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968013.934176576)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968023.350183674)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968005.082747661)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968032.612512146)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968041.619616307)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968049.714029333)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968058.560170023)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968067.654392873)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968085.920893488)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968095.049831109)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968076.300286162)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968111.880932024)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968120.481756953)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968103.314337366)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 968129.735185504)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968156.114463486)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 968137.809145141)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 968147.516157306)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 968164.108233671)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968172.456257596)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968188.275887365)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968197.445196038)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968206.414827823)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968216.638859584)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 968225.943532936)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968253.304869581)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968262.150362283)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968235.014178329)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 968244.412784781)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968271.354189089)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968280.480262908)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968298.755996295)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968289.286374941)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968315.863098216)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968325.1914545)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968307.178738309)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 968333.311407874)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968355.184490839)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968365.139692407)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968342.718256151)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968383.924263074)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 968393.094979734)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 968402.269028591)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968410.914947872)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968431.945193516)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968441.610289687)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968420.099931508)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968452.094792225)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968478.934026641)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968461.597213678)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 968470.468004747)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968487.135087132)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968497.280466674)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968506.521903965)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968524.598164517)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968532.887408246)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968542.154089786)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968578.021767099)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968560.694722383)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 968569.327364035)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968586.278225873)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968594.5597191)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968602.212057515)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968610.966080208)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968626.03980312)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968643.316958038)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968654.958340911)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968634.671155981)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968667.976081192)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968676.613489637)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 968685.460566436)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968693.035386724)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968701.456737279)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968717.55755441)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968709.997253171)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968735.537479947)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968727.276493472)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968743.309644698)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 968751.619909501)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968768.77381772)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968787.395704418)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968760.20687978)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 968777.020567835)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968796.848057979)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968814.63182612)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968805.725534047)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968831.668233748)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968839.909453672)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968848.432666135)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968856.265052438)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968865.783684757)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968874.569843264)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 968883.133319716)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968909.517050446)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968918.996524126)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968900.823185439)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968928.248420336)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968936.560577212)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968954.426115543)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968945.32636731)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968965.537956751)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968979.61552331)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 968988.234860475)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 968997.532574361)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969015.183285962)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969023.657637055)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969007.03119963)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969033.916126115)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969061.237966037)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969043.308988125)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969052.74615035)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969069.30008963)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969078.005806554)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969087.147121302)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969113.947970795)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969123.016872888)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969131.344927408)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969105.175705667)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969141.206876138)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969148.983517348)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969157.464375501)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969167.475172635)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969194.832474985)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969203.408142814)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969176.899713667)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969185.923267923)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969225.047033883)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969234.062220049)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969213.462573161)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969254.472688948)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969263.889146413)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969273.634948196)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969292.838700992)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969311.745941972)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969302.284342693)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969321.944984625)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969331.735713117)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969340.994524303)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969351.34081831)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969370.385373511)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969379.739975423)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969391.154772595)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969360.450948148)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969401.022464897)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969410.769455964)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969420.025588511)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969439.492427989)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969448.835142611)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969458.006386304)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969476.293039867)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969493.686447918)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969485.155035346)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969501.791384538)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969510.685651215)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969529.028441558)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969519.58907287)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969538.246949471)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969546.974043182)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969555.598256746)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969564.793538658)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969591.280598356)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969573.479102193)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969582.147069649)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969600.223419745)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969608.703154317)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969617.135472987)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969625.676486371)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969647.157591075)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969655.850798943)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969665.049410889)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969673.93052219)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969691.730631725)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969682.318236363)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969699.740988362)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969708.407062729)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969717.09720299)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969725.007929318)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969734.519812631)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 967960.25762077)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 968374.999770127)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 968515.268756102)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 968552.445863192)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 968823.222501489)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 968891.666220341)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969096.805219052)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969243.864121309)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969284.105581792)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969430.676488511)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969467.360650832)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969751.422141035)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969759.759215146)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969742.942635428)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969769.020929836)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969805.709988793)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969776.639439495)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969785.756887523)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969797.084421584)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969836.070431982)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969844.127279828)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969814.508757901)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969823.382202832)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969860.99463401)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969878.44839515)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969852.539464608)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969869.425892598)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969896.608117465)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969905.051933705)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969887.086897309)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969912.808208199)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969921.047209762)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969930.439810691)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969940.560764218)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969948.72767742)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969957.555437798)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969966.366164952)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969974.836832974)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970004.299672269)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 969983.219146878)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 969996.12846345)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970018.247050567)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970027.028296463)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970036.310227158)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970045.238767729)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970053.209587775)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970071.054895269)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970062.610490229)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 970079.8573754)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970088.399633543)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970105.975406012)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 970115.080141544)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970096.833346317)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970139.405045207)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970125.723358712)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 970150.907382129)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970168.44061551)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970177.171465195)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970185.427333793)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970160.25636007)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970210.551319302)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970193.689794435)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970202.411468223)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970249.425620653)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 970219.400674642)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970229.341131305)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 970240.106316788)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970275.702502033)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970258.812415162)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 970266.893416776)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 970283.860038117)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970292.408089299)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970301.172967031)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970310.001529458)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970319.214864037)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970337.428603212)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970347.948112184)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970328.956485747)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 970356.571429709)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970383.369198107)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970365.383067576)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970374.206498455)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970391.786837521)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970400.320138281)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970418.236565036)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970409.293020328)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970427.726735507)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970448.95032687)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970457.674734645)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970436.255558885)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970466.38678672)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970474.495753025)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 970484.091774392)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970492.597608408)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970501.299721571)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970510.469677098)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970528.999799933)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970519.192873261)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970546.238211412)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970563.298892269)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970537.360354011)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 970554.238108156)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970572.177923524)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 970579.83769902)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 970588.459519499)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 970596.701391876)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970605.08729467)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970623.357622592)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970613.697520513)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970631.78681023)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970643.120582957)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970653.436030392)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970661.968230751)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970671.256542469)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970679.817974316)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970696.212195304)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970688.335172596)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970706.204721212)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970714.583300281)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970723.916827219)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 970734.065243603)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970760.395407363)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970769.267852938)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 970742.192295855)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 970750.857905685)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970779.466768025)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970796.726877284)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970788.175750097)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 970806.342268294)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970824.075791325)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970834.652708513)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970815.519083118)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970845.039437537)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970855.699141098)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970867.349333251)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970876.427486327)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970906.26785741)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970915.831316128)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970886.129225789)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 970897.75858666)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970925.03069145)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970934.214893079)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970954.999685266)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970944.30207463)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970975.699461089)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970985.573563457)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970995.359655368)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 970966.637811247)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 971024.776170566)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971004.547229969)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 971015.549074269)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971034.064361737)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971043.74457609)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971053.504855801)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971063.801119135)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971101.556835768)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971072.885212874)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971082.755282755)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 971092.274194318)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971110.143702858)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971119.931451654)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971140.523003956)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971130.418785231)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971151.809218618)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971161.199298736)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971175.086131578)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971196.08704689)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971205.541902401)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971214.944545308)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971185.696562453)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971235.365828074)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971257.11713498)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 971226.057559461)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971247.310331774)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971276.964334535)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971295.323066884)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 971267.656311628)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 971286.505080504)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971306.6888594)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971316.060105481)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971328.330498716)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971337.608308594)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971347.818049789)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971368.462684517)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971357.963225625)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971389.861503131)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971399.215367795)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971408.558868407)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971378.900173291)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971420.340948708)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971431.483013177)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971443.397352917)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 971454.10387084)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971473.142964095)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971482.88553274)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971463.734778406)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971501.303827873)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971511.484184482)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971521.215125831)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971491.882450784)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971540.678050146)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971551.212365341)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971560.460358979)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971530.747175925)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971571.615944316)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971600.723731957)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971580.544567536)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 971591.162653708)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971610.368601024)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971628.593567822)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971619.574753944)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 971637.557497488)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971647.265449414)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971657.297827045)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971666.508623239)])']\n", "connector: \n", "Evaluating workflow: 28%|██▊ | 705/2500 [1:52:44<4:59:36, 10.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 706/2500 [1:52:54<4:59:55, 10.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 707/2500 [1:53:04<4:58:15, 9.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 708/2500 [1:53:14<5:00:06, 10.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 709/2500 [1:53:24<4:57:53, 9.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 710/2500 [1:53:34<4:57:14, 9.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 711/2500 [1:53:44<4:53:52, 9.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 712/2500 [1:53:53<4:52:06, 9.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▊ | 713/2500 [1:54:04<4:56:08, 9.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▊ | 714/2500 [1:54:14<5:01:04, 10.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▊ | 715/2500 [1:54:25<5:03:31, 10.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▊ | 716/2500 [1:54:34<4:55:29, 9.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▊ | 717/2500 [1:54:43<4:51:32, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▊ | 718/2500 [1:54:53<4:52:34, 9.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 719/2500 [1:55:04<4:54:57, 9.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 720/2500 [1:55:13<4:48:30, 9.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 721/2500 [1:55:23<4:55:57, 9.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 722/2500 [1:55:33<4:53:30, 9.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 723/2500 [1:55:42<4:48:20, 9.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 724/2500 [1:55:52<4:47:16, 9.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 725/2500 [1:56:03<4:54:07, 9.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 726/2500 [1:56:13<4:56:08, 10.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 727/2500 [1:56:23<5:00:06, 10.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 728/2500 [1:56:33<4:58:47, 10.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 729/2500 [1:56:42<4:47:00, 9.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 730/2500 [1:56:51<4:39:34, 9.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 731/2500 [1:57:00<4:32:49, 9.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 732/2500 [1:57:09<4:34:15, 9.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 733/2500 [1:57:19<4:38:24, 9.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 734/2500 [1:57:29<4:41:46, 9.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 735/2500 [1:57:38<4:34:36, 9.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 736/2500 [1:57:46<4:27:42, 9.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 737/2500 [1:57:56<4:31:27, 9.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 738/2500 [1:58:03<4:18:23, 8.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 739/2500 [1:58:12<4:14:43, 8.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 740/2500 [1:58:21<4:22:58, 8.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 741/2500 [1:58:30<4:21:09, 8.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 742/2500 [1:58:38<4:14:02, 8.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 743/2500 [1:58:47<4:12:24, 8.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 744/2500 [1:58:55<4:04:43, 8.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 745/2500 [1:59:04<4:15:06, 8.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 746/2500 [1:59:13<4:18:24, 8.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 747/2500 [1:59:22<4:16:56, 8.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 748/2500 [1:59:31<4:22:20, 8.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 749/2500 [1:59:40<4:22:04, 8.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 750/2500 [1:59:49<4:14:52, 8.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 751/2500 [1:59:57<4:11:18, 8.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 752/2500 [2:00:06<4:14:15, 8.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 753/2500 [2:00:14<4:09:40, 8.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 754/2500 [2:00:22<4:05:18, 8.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 755/2500 [2:00:32<4:19:41, 8.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 756/2500 [2:00:40<4:07:38, 8.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 757/2500 [2:00:48<4:00:30, 8.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 758/2500 [2:00:56<4:04:27, 8.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 759/2500 [2:01:04<4:01:03, 8.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 760/2500 [2:01:13<4:02:06, 8.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 761/2500 [2:01:21<3:59:22, 8.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 762/2500 [2:01:29<3:59:58, 8.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 763/2500 [2:01:38<4:05:30, 8.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 764/2500 [2:01:47<4:04:58, 8.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 765/2500 [2:01:55<4:04:44, 8.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 766/2500 [2:02:04<4:04:52, 8.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 767/2500 [2:02:12<4:04:31, 8.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 768/2500 [2:02:21<4:06:22, 8.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 769/2500 [2:02:29<4:04:32, 8.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 770/2500 [2:02:37<4:00:51, 8.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 771/2500 [2:02:45<3:59:12, 8.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 772/2500 [2:02:54<3:59:13, 8.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 773/2500 [2:03:04<4:15:52, 8.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 774/2500 [2:03:13<4:14:16, 8.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 775/2500 [2:03:21<4:12:05, 8.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 776/2500 [2:03:30<4:10:29, 8.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 777/2500 [2:03:38<4:08:47, 8.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 778/2500 [2:03:47<4:13:18, 8.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 779/2500 [2:03:56<4:11:06, 8.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 780/2500 [2:04:04<4:03:42, 8.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 781/2500 [2:04:12<4:00:39, 8.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 782/2500 [2:04:21<4:00:51, 8.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 783/2500 [2:04:30<4:08:20, 8.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 784/2500 [2:04:39<4:08:10, 8.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 785/2500 [2:04:47<4:04:37, 8.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 786/2500 [2:04:55<4:04:25, 8.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 787/2500 [2:05:04<4:04:09, 8.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 788/2500 [2:05:13<4:05:42, 8.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 789/2500 [2:05:22<4:07:59, 8.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 790/2500 [2:05:30<4:04:53, 8.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 791/2500 [2:05:40<4:13:36, 8.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 792/2500 [2:05:49<4:16:52, 9.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 793/2500 [2:05:59<4:23:35, 9.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 794/2500 [2:06:07<4:18:21, 9.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 795/2500 [2:06:16<4:11:18, 8.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 796/2500 [2:06:26<4:22:35, 9.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 797/2500 [2:06:36<4:27:43, 9.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 798/2500 [2:06:46<4:33:07, 9.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 799/2500 [2:06:56<4:37:40, 9.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 800/2500 [2:07:05<4:30:05, 9.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 801/2500 [2:07:15<4:32:28, 9.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 802/2500 [2:07:24<4:28:34, 9.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 803/2500 [2:07:33<4:26:40, 9.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 804/2500 [2:07:43<4:26:54, 9.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 805/2500 [2:07:52<4:25:24, 9.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 806/2500 [2:08:01<4:26:31, 9.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 807/2500 [2:08:11<4:23:02, 9.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 808/2500 [2:08:20<4:27:26, 9.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 809/2500 [2:08:29<4:21:26, 9.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 810/2500 [2:08:39<4:25:52, 9.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 811/2500 [2:08:47<4:16:21, 9.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 812/2500 [2:08:57<4:20:38, 9.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 813/2500 [2:09:06<4:15:40, 9.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 814/2500 [2:09:14<4:10:57, 8.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 815/2500 [2:09:23<4:12:06, 8.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 816/2500 [2:09:32<4:08:42, 8.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 817/2500 [2:09:41<4:13:31, 9.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 818/2500 [2:09:50<4:07:51, 8.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 819/2500 [2:09:58<4:04:43, 8.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 820/2500 [2:10:07<4:01:23, 8.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 821/2500 [2:10:16<4:08:27, 8.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 822/2500 [2:10:26<4:13:17, 9.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 823/2500 [2:10:37<4:31:57, 9.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 824/2500 [2:10:46<4:28:11, 9.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 825/2500 [2:10:54<4:17:16, 9.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 826/2500 [2:11:07<4:42:44, 10.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 827/2500 [2:11:15<4:30:54, 9.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 828/2500 [2:11:23<4:12:05, 9.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 829/2500 [2:11:31<4:07:06, 8.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 830/2500 [2:11:41<4:15:19, 9.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 831/2500 [2:11:50<4:08:12, 8.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 832/2500 [2:11:58<4:06:14, 8.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 833/2500 [2:12:06<3:59:52, 8.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 834/2500 [2:12:15<3:59:21, 8.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 835/2500 [2:12:24<4:01:21, 8.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 836/2500 [2:12:32<4:00:05, 8.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 837/2500 [2:12:41<4:02:51, 8.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 838/2500 [2:12:49<3:56:08, 8.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 839/2500 [2:12:58<3:57:22, 8.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 840/2500 [2:13:07<3:58:46, 8.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 841/2500 [2:13:16<4:03:21, 8.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 842/2500 [2:13:26<4:08:11, 8.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 843/2500 [2:13:35<4:15:46, 9.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 844/2500 [2:13:43<4:03:34, 8.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 845/2500 [2:13:52<4:05:08, 8.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 846/2500 [2:14:02<4:15:25, 9.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 847/2500 [2:14:12<4:20:47, 9.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 848/2500 [2:14:21<4:15:08, 9.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 849/2500 [2:14:29<4:07:18, 8.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 850/2500 [2:14:39<4:09:42, 9.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 851/2500 [2:14:56<5:17:59, 11.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 852/2500 [2:15:04<4:51:04, 10.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 853/2500 [2:15:13<4:31:58, 9.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 854/2500 [2:15:21<4:13:58, 9.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 855/2500 [2:15:29<4:04:56, 8.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 856/2500 [2:15:36<3:52:43, 8.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 857/2500 [2:15:45<3:52:34, 8.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 858/2500 [2:15:53<3:49:36, 8.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 859/2500 [2:16:02<3:57:45, 8.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 860/2500 [2:16:11<3:58:58, 8.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 861/2500 [2:16:19<3:52:15, 8.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 862/2500 [2:16:28<3:53:44, 8.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 863/2500 [2:16:37<3:58:54, 8.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 864/2500 [2:16:45<3:54:03, 8.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 865/2500 [2:16:54<3:56:18, 8.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 866/2500 [2:17:02<3:53:42, 8.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 867/2500 [2:17:11<3:51:03, 8.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 868/2500 [2:17:20<3:56:55, 8.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 869/2500 [2:17:29<4:00:49, 8.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 870/2500 [2:17:38<4:00:36, 8.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 871/2500 [2:17:46<3:54:33, 8.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 872/2500 [2:17:56<4:03:32, 8.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 873/2500 [2:18:06<4:14:09, 9.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 874/2500 [2:18:15<4:12:26, 9.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 875/2500 [2:18:24<4:04:08, 9.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 876/2500 [2:18:33<4:04:01, 9.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 877/2500 [2:18:41<3:58:06, 8.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 878/2500 [2:18:50<3:56:52, 8.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 879/2500 [2:18:58<3:57:07, 8.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 880/2500 [2:19:07<3:56:10, 8.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 881/2500 [2:19:16<3:58:15, 8.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 882/2500 [2:19:25<3:56:59, 8.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 883/2500 [2:19:33<3:55:25, 8.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 884/2500 [2:19:42<3:57:20, 8.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 885/2500 [2:19:50<3:49:24, 8.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 886/2500 [2:20:00<4:00:41, 8.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 887/2500 [2:20:08<3:53:15, 8.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 888/2500 [2:20:16<3:49:40, 8.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 889/2500 [2:20:26<3:54:41, 8.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 890/2500 [2:20:34<3:47:43, 8.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 891/2500 [2:20:42<3:45:53, 8.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 892/2500 [2:20:51<3:49:55, 8.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 893/2500 [2:20:59<3:48:27, 8.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 894/2500 [2:21:10<4:04:44, 9.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 895/2500 [2:21:19<4:03:29, 9.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 896/2500 [2:21:28<4:07:04, 9.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 897/2500 [2:21:37<4:03:00, 9.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 898/2500 [2:21:46<4:01:18, 9.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 899/2500 [2:21:55<3:58:54, 8.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 900/2500 [2:22:03<3:49:20, 8.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 901/2500 [2:22:16<4:26:34, 10.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 902/2500 [2:22:24<4:15:44, 9.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 903/2500 [2:22:33<4:09:03, 9.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 904/2500 [2:22:42<4:02:16, 9.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 905/2500 [2:22:50<3:54:15, 8.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 906/2500 [2:22:58<3:51:36, 8.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 907/2500 [2:23:08<3:59:29, 9.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 908/2500 [2:23:17<3:54:12, 8.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 909/2500 [2:23:24<3:46:36, 8.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 910/2500 [2:23:34<3:53:10, 8.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 911/2500 [2:23:43<3:59:06, 9.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 912/2500 [2:23:51<3:51:41, 8.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 913/2500 [2:24:00<3:48:43, 8.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 914/2500 [2:24:08<3:43:07, 8.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 915/2500 [2:24:16<3:40:36, 8.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 916/2500 [2:24:25<3:42:15, 8.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 917/2500 [2:24:36<4:02:44, 9.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 918/2500 [2:24:43<3:50:37, 8.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 919/2500 [2:24:52<3:48:30, 8.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 920/2500 [2:25:00<3:46:56, 8.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 921/2500 [2:25:08<3:42:31, 8.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 922/2500 [2:25:19<3:58:32, 9.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 923/2500 [2:25:28<3:59:32, 9.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 924/2500 [2:25:37<3:55:22, 8.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 925/2500 [2:25:48<4:16:34, 9.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 926/2500 [2:25:56<4:01:02, 9.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 927/2500 [2:26:06<4:02:41, 9.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 928/2500 [2:26:15<4:05:10, 9.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 929/2500 [2:26:24<3:57:32, 9.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 930/2500 [2:26:33<4:01:34, 9.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 931/2500 [2:26:42<3:58:32, 9.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 932/2500 [2:26:50<3:52:42, 8.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 933/2500 [2:26:58<3:45:22, 8.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 934/2500 [2:27:06<3:38:54, 8.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 935/2500 [2:27:14<3:36:26, 8.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 936/2500 [2:27:22<3:31:18, 8.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 937/2500 [2:27:32<3:45:06, 8.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 938/2500 [2:27:42<3:54:01, 8.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 939/2500 [2:27:51<3:53:47, 8.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 940/2500 [2:28:00<3:53:59, 9.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 941/2500 [2:28:09<3:54:21, 9.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 942/2500 [2:28:24<4:40:17, 10.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 943/2500 [2:28:32<4:23:33, 10.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 944/2500 [2:28:42<4:18:49, 9.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 945/2500 [2:28:51<4:08:27, 9.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 946/2500 [2:28:59<3:57:05, 9.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 947/2500 [2:29:07<3:48:38, 8.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 948/2500 [2:29:16<3:50:43, 8.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 949/2500 [2:29:25<3:51:41, 8.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 950/2500 [2:29:33<3:44:45, 8.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 951/2500 [2:29:41<3:35:44, 8.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 952/2500 [2:29:49<3:38:41, 8.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 953/2500 [2:29:58<3:39:33, 8.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 954/2500 [2:30:07<3:41:19, 8.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 955/2500 [2:30:15<3:41:49, 8.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 956/2500 [2:30:25<3:45:21, 8.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 957/2500 [2:30:33<3:44:33, 8.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 958/2500 [2:30:42<3:43:36, 8.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 959/2500 [2:30:50<3:41:43, 8.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 960/2500 [2:30:58<3:37:03, 8.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 961/2500 [2:31:06<3:32:02, 8.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 962/2500 [2:31:15<3:39:58, 8.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▊ | 963/2500 [2:31:24<3:38:45, 8.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▊ | 964/2500 [2:31:34<3:46:57, 8.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▊ | 965/2500 [2:31:43<3:48:22, 8.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▊ | 966/2500 [2:31:52<3:49:42, 8.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▊ | 967/2500 [2:32:02<3:57:33, 9.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▊ | 968/2500 [2:32:10<3:50:04, 9.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 969/2500 [2:32:19<3:49:16, 8.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 970/2500 [2:32:30<4:02:25, 9.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 971/2500 [2:32:39<3:58:06, 9.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 972/2500 [2:32:47<3:49:59, 9.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 973/2500 [2:32:56<3:46:00, 8.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 974/2500 [2:33:06<3:54:13, 9.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 975/2500 [2:33:16<3:59:53, 9.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 976/2500 [2:33:26<4:08:39, 9.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 977/2500 [2:33:34<3:54:59, 9.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 978/2500 [2:33:44<4:02:16, 9.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 979/2500 [2:33:55<4:10:55, 9.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 980/2500 [2:34:04<4:02:44, 9.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 981/2500 [2:34:14<4:09:05, 9.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 982/2500 [2:34:23<3:57:50, 9.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 983/2500 [2:34:31<3:51:41, 9.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 984/2500 [2:34:40<3:48:29, 9.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 985/2500 [2:34:48<3:43:13, 8.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 986/2500 [2:34:57<3:38:56, 8.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 987/2500 [2:35:05<3:34:33, 8.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 988/2500 [2:35:13<3:28:00, 8.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 989/2500 [2:35:21<3:25:41, 8.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 990/2500 [2:35:30<3:35:17, 8.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 991/2500 [2:35:38<3:34:26, 8.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 992/2500 [2:35:47<3:33:43, 8.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 993/2500 [2:35:56<3:37:50, 8.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 994/2500 [2:36:06<3:45:01, 8.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 995/2500 [2:36:16<3:56:46, 9.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 996/2500 [2:36:26<3:58:33, 9.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 997/2500 [2:36:36<4:03:02, 9.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 998/2500 [2:36:44<3:52:34, 9.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 999/2500 [2:36:53<3:50:42, 9.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1000/2500 [2:37:02<3:48:24, 9.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1001/2500 [2:37:12<3:49:50, 9.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1002/2500 [2:37:21<3:48:43, 9.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1003/2500 [2:37:29<3:38:11, 8.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1004/2500 [2:37:37<3:39:36, 8.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1005/2500 [2:37:47<3:41:46, 8.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1006/2500 [2:37:55<3:38:09, 8.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1007/2500 [2:38:04<3:35:57, 8.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1008/2500 [2:38:13<3:38:22, 8.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1009/2500 [2:38:21<3:37:53, 8.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1010/2500 [2:38:31<3:44:42, 9.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1011/2500 [2:38:40<3:44:34, 9.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1012/2500 [2:38:49<3:46:08, 9.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1013/2500 [2:38:59<3:46:55, 9.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1014/2500 [2:39:07<3:41:27, 8.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1015/2500 [2:39:17<3:47:05, 9.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1016/2500 [2:39:26<3:45:41, 9.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1017/2500 [2:39:34<3:41:09, 8.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1018/2500 [2:39:43<3:39:07, 8.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1019/2500 [2:39:53<3:48:45, 9.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1020/2500 [2:40:02<3:45:44, 9.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1021/2500 [2:40:12<3:50:38, 9.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1022/2500 [2:40:21<3:49:12, 9.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1023/2500 [2:40:36<4:33:07, 11.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1024/2500 [2:40:46<4:19:09, 10.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1025/2500 [2:40:54<4:03:17, 9.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1026/2500 [2:41:04<4:02:27, 9.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1027/2500 [2:41:14<4:05:47, 10.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1028/2500 [2:41:23<3:57:29, 9.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1029/2500 [2:41:33<3:58:22, 9.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1030/2500 [2:41:44<4:07:27, 10.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1031/2500 [2:41:55<4:13:53, 10.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 1032/2500 [2:42:05<4:11:23, 10.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 1033/2500 [2:42:14<4:03:41, 9.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 1034/2500 [2:42:23<3:53:43, 9.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 1035/2500 [2:42:31<3:45:53, 9.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 1036/2500 [2:42:40<3:42:18, 9.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 1037/2500 [2:42:50<3:49:23, 9.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1038/2500 [2:43:00<3:54:21, 9.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1039/2500 [2:43:11<3:59:34, 9.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1040/2500 [2:43:21<4:01:12, 9.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1041/2500 [2:43:30<3:56:19, 9.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1042/2500 [2:43:40<3:57:50, 9.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1043/2500 [2:43:51<4:03:44, 10.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1044/2500 [2:44:01<4:08:14, 10.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1045/2500 [2:44:10<3:57:58, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1046/2500 [2:44:30<5:09:26, 12.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1047/2500 [2:44:39<4:45:44, 11.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1048/2500 [2:44:50<4:39:44, 11.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1049/2500 [2:44:59<4:15:52, 10.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1050/2500 [2:45:07<4:00:24, 9.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1051/2500 [2:45:17<3:57:19, 9.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1052/2500 [2:45:27<4:04:31, 10.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1053/2500 [2:45:37<4:03:10, 10.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1054/2500 [2:45:47<3:58:41, 9.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1055/2500 [2:45:57<3:57:10, 9.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1056/2500 [2:46:05<3:49:57, 9.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1057/2500 [2:46:15<3:53:02, 9.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1058/2500 [2:46:25<3:52:14, 9.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1059/2500 [2:46:35<3:51:09, 9.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1060/2500 [2:46:45<3:55:48, 9.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1061/2500 [2:46:53<3:42:47, 9.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1062/2500 [2:47:02<3:41:44, 9.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1063/2500 [2:47:11<3:35:50, 9.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1064/2500 [2:47:18<3:24:33, 8.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1065/2500 [2:47:27<3:29:45, 8.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1066/2500 [2:47:35<3:22:58, 8.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1067/2500 [2:47:44<3:26:27, 8.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1068/2500 [2:47:55<3:42:41, 9.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1069/2500 [2:48:06<3:50:37, 9.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1070/2500 [2:48:13<3:36:51, 9.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1071/2500 [2:48:22<3:33:08, 8.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1072/2500 [2:48:29<3:22:44, 8.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1073/2500 [2:48:38<3:20:31, 8.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1074/2500 [2:48:47<3:28:22, 8.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1075/2500 [2:48:56<3:30:31, 8.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1076/2500 [2:49:09<3:56:45, 9.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1077/2500 [2:49:18<3:51:49, 9.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1078/2500 [2:49:27<3:46:06, 9.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1079/2500 [2:49:36<3:37:56, 9.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1080/2500 [2:49:44<3:34:32, 9.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1081/2500 [2:49:53<3:34:21, 9.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1082/2500 [2:50:03<3:39:59, 9.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1083/2500 [2:50:11<3:31:36, 8.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1084/2500 [2:50:21<3:32:55, 9.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1085/2500 [2:50:28<3:24:37, 8.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1086/2500 [2:50:37<3:22:11, 8.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1087/2500 [2:50:47<3:30:47, 8.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1088/2500 [2:50:57<3:42:41, 9.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1089/2500 [2:51:06<3:40:36, 9.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1090/2500 [2:51:15<3:33:00, 9.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1091/2500 [2:51:23<3:29:50, 8.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1092/2500 [2:51:32<3:29:11, 8.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1093/2500 [2:51:41<3:29:12, 8.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1094/2500 [2:51:51<3:31:47, 9.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1095/2500 [2:51:58<3:19:18, 8.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1096/2500 [2:52:06<3:19:09, 8.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1097/2500 [2:52:14<3:16:08, 8.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1098/2500 [2:52:23<3:17:20, 8.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1099/2500 [2:52:32<3:18:42, 8.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1100/2500 [2:52:40<3:14:12, 8.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1101/2500 [2:52:48<3:16:25, 8.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1102/2500 [2:52:56<3:12:04, 8.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1103/2500 [2:53:10<3:50:46, 9.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1104/2500 [2:53:18<3:37:05, 9.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1105/2500 [2:53:27<3:36:59, 9.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1106/2500 [2:53:38<3:45:55, 9.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1107/2500 [2:53:47<3:41:25, 9.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1108/2500 [2:53:56<3:41:25, 9.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1109/2500 [2:54:07<3:51:22, 9.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1110/2500 [2:54:19<4:00:36, 10.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1111/2500 [2:54:31<4:14:04, 10.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1112/2500 [2:54:40<4:00:36, 10.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1113/2500 [2:54:51<4:01:30, 10.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1114/2500 [2:54:59<3:46:17, 9.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1115/2500 [2:55:10<3:55:50, 10.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1116/2500 [2:55:19<3:44:53, 9.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1117/2500 [2:55:30<3:54:59, 10.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1118/2500 [2:55:39<3:46:49, 9.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1119/2500 [2:55:50<3:52:59, 10.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1120/2500 [2:56:01<4:00:48, 10.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1121/2500 [2:56:12<4:00:17, 10.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1122/2500 [2:56:20<3:48:41, 9.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1123/2500 [2:56:29<3:38:30, 9.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1124/2500 [2:56:37<3:29:00, 9.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1125/2500 [2:56:45<3:23:57, 8.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1126/2500 [2:56:54<3:19:37, 8.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1127/2500 [2:57:02<3:15:34, 8.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1128/2500 [2:57:12<3:28:33, 9.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1129/2500 [2:57:21<3:27:53, 9.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1130/2500 [2:57:29<3:19:45, 8.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1131/2500 [2:57:38<3:22:26, 8.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1132/2500 [2:57:47<3:20:14, 8.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1133/2500 [2:57:56<3:23:04, 8.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1134/2500 [2:58:05<3:20:16, 8.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1135/2500 [2:58:13<3:14:12, 8.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1136/2500 [2:58:21<3:11:29, 8.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1137/2500 [2:58:29<3:09:52, 8.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1138/2500 [2:58:37<3:08:35, 8.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1139/2500 [2:58:46<3:09:38, 8.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1140/2500 [2:58:54<3:08:14, 8.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1141/2500 [2:59:02<3:08:02, 8.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1142/2500 [2:59:13<3:22:25, 8.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1143/2500 [2:59:34<4:44:47, 12.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1144/2500 [2:59:43<4:20:47, 11.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1145/2500 [2:59:51<3:59:04, 10.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1146/2500 [2:59:59<3:41:53, 9.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1147/2500 [3:00:09<3:39:53, 9.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1148/2500 [3:00:18<3:36:56, 9.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1149/2500 [3:00:27<3:29:30, 9.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1150/2500 [3:00:35<3:21:27, 8.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1151/2500 [3:00:44<3:24:06, 9.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1152/2500 [3:00:53<3:19:35, 8.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1153/2500 [3:01:02<3:19:32, 8.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1154/2500 [3:01:11<3:21:27, 8.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1155/2500 [3:01:21<3:27:47, 9.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1156/2500 [3:01:30<3:28:06, 9.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1157/2500 [3:01:39<3:28:07, 9.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1158/2500 [3:01:48<3:23:05, 9.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1159/2500 [3:01:56<3:14:45, 8.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1160/2500 [3:02:04<3:11:08, 8.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1161/2500 [3:02:14<3:17:50, 8.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1162/2500 [3:02:22<3:14:55, 8.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1163/2500 [3:02:31<3:14:39, 8.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1164/2500 [3:02:39<3:10:11, 8.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1165/2500 [3:02:47<3:08:07, 8.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1166/2500 [3:02:57<3:17:54, 8.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1167/2500 [3:03:05<3:13:55, 8.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1168/2500 [3:03:16<3:26:25, 9.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1169/2500 [3:03:25<3:25:07, 9.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1170/2500 [3:03:34<3:23:44, 9.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1171/2500 [3:03:43<3:23:05, 9.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1172/2500 [3:03:51<3:16:09, 8.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971675.938504051)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971695.329140562)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971686.32786291)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 971704.566293768)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971713.898366054)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971723.235282969)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971731.316192421)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 971743.306843347)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971752.301813725)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971762.401971096)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971774.447620266)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971783.237257586)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971793.503744343)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971823.966426447)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971803.360962645)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 971813.52797019)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971843.842964144)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971853.16730515)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971834.155376523)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971873.568346952)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971883.743337813)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971894.126770096)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971902.748782397)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971912.195013319)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 971922.50849004)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 971932.159547856)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971970.335210271)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971950.69387968)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 971960.374408926)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971979.322806072)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971998.324707626)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972007.584817625)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971988.831495421)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972039.024037557)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972049.620727965)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972017.313336955)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972028.224669006)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972068.526579411)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972077.008574946)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972087.700931769)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972059.410054333)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972097.159408817)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972105.683511758)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972114.373862359)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 972125.988985525)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972134.632286383)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972142.848360375)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972152.388993157)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972179.718784085)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972190.260243534)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972170.767725241)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972200.1782333)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972228.727717055)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972209.295193097)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 972218.108331436)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972242.321863866)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 972252.506973573)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972271.227831472)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972261.948075504)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972289.870208077)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972280.43972616)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 972299.401216035)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 972309.114346761)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972318.354134638)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972329.460787146)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972338.731898197)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 972349.835358008)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972380.013465027)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972359.913915896)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 972369.8249429)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 972389.219226905)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972398.649743145)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972407.667521528)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972416.456927718)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972426.500384111)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972435.206478152)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 972444.425390281)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 972454.692567508)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972481.751887682)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972491.861254676)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972463.525631733)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 972473.111435888)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972502.039235428)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972532.134802155)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972510.789708865)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 972522.7450709)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972542.252823149)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972551.326946655)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972561.156992068)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 972571.109934655)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972583.292224038)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972592.835626383)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972602.195354168)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972612.308809846)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972621.530429191)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972641.476542231)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972631.00322262)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972650.396879579)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972668.807455601)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972678.798160256)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972659.319325641)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972688.726295288)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972709.007792743)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972718.215337449)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972698.492053104)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972728.381385292)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972750.072502109)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972759.434461652)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972739.239144063)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972769.533600827)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972800.476448087)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972779.597742605)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 972790.177806905)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972825.441556731)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972835.668247667)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972809.610753754)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972846.045445742)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972855.854929338)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972864.801130209)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972875.103969043)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972904.315031644)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972914.554360346)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972884.652457422)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 972894.43774776)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972924.670481109)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972934.430696468)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972944.679111704)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 972954.878453588)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972965.65419546)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972975.157748191)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 972983.634940058)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 972993.265885302)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973004.147169115)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973013.879854444)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 973023.721247043)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973032.863048698)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973042.509667604)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973052.67304421)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 973063.3324233)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973082.063254305)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 973091.645574666)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973101.055751958)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973072.339275419)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973123.885902837)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973133.11831196)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973141.819596349)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973111.14994747)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973150.926693671)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973160.121941441)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973169.608403214)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973180.855979707)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973209.201743646)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973190.38351931)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 973199.170966654)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973238.735598446)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973248.138929853)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973229.318350565)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 973258.915297178)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973278.419866876)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973287.76045579)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973269.109812967)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973306.858752811)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973316.895495011)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973296.181297022)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 973327.090976527)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973346.816491683)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973364.870704615)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973337.142733658)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 973374.494475481)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973384.360052247)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973395.285117508)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973404.261903786)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973414.40205837)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973433.556130702)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973424.116607537)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 973443.270570792)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973461.890247981)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973470.619250056)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973480.44348527)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973453.525267624)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973490.120416326)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973509.348087771)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973519.503886304)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973499.404346232)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973537.886350251)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973546.677632994)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973555.286257063)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973528.704649641)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973574.925822597)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973585.502776102)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973595.913925809)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973564.27013112)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973605.53755281)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973614.044588589)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 973622.838585947)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 971863.276392324)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 971941.998171714)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 972162.157739947)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 973218.474422935)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 973634.582299247)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973654.5144155)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973674.540874434)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973644.651473666)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 973664.72096789)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973694.072024887)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973703.749481331)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973714.021313714)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973684.466722244)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973724.534709644)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973734.94311539)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 973753.777652339)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973744.264659783)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973773.85949447)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973783.090168408)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973763.721890095)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973793.671496459)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973803.395324353)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973812.738269641)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973822.370937483)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973832.867192162)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973853.538676326)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973863.564284506)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973843.055948753)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973872.369389211)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973881.272581322)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 973890.004382381)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 973899.438014255)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973919.085737353)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973927.864180015)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973936.435798766)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973909.232663236)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973945.983371632)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973953.755594413)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973962.155674317)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973971.787177011)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973988.677957964)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973997.17970362)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 973980.562877197)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974004.940472587)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974014.501892624)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974032.304190298)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974023.61559309)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974041.732508816)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974058.877341612)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974067.225356869)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974050.703850448)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974084.419935656)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974092.510949915)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974076.200257433)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 974102.60522899)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974110.170118693)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974117.886997924)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 974126.63592922)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 974134.680907837)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974168.456073099)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974143.124762176)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 974151.174936943)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 974159.519161225)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974185.349090585)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974193.843194884)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974202.29268278)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974176.892572665)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974219.329054054)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974227.395113703)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974210.989481733)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974235.573465005)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974243.892100463)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974262.864595013)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974254.143922256)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974280.068385529)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974271.468937785)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 974288.606323082)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 974297.811661985)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974306.398497586)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974314.308791344)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 974322.47313267)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974330.911717823)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974340.211243767)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974348.886305262)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974357.16697186)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974374.2604633)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974365.718179873)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974383.010013409)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 974391.905420006)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974400.255841456)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974409.884745806)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974429.017941863)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974419.189103597)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974445.964948278)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974456.149699931)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974466.017191071)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974437.687012302)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974476.103310267)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974486.28441186)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974495.207022382)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974505.039194388)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974523.505442486)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974514.221334018)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 974532.981294323)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974542.265413304)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974551.810737402)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974570.719062046)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974560.857779602)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974589.330009151)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974597.662057029)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974607.295139196)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974579.511990021)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974615.9884775)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974633.625090144)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974624.539380442)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 974642.216591483)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974651.666940401)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974660.049293735)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 974668.535964622)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974676.890901546)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974686.372320739)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974707.144180275)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974695.84323086)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974716.444376281)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974724.760894156)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 974737.039034944)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 974745.778348233)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974771.605618546)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974753.261864732)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 974761.730079619)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 974779.94411534)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974788.648265392)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974796.759560299)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 974805.348701307)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974814.226177096)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974822.789648799)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974831.795716588)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974839.767916188)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974848.459090807)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974875.822550389)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974857.219321298)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 974866.41982893)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974885.737515067)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974912.724188843)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974893.543574319)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 974902.576157357)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974922.659227428)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974931.459560309)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974939.795293405)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974949.09225457)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974966.472594875)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974974.798894834)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 974983.099927048)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974999.017570226)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975006.483188355)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975014.976171481)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 974990.839425853)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975032.526084402)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975041.384611601)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975049.326913491)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975023.124799586)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975075.418916165)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975058.026895413)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975067.237068913)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975084.296412418)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975092.667962197)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975110.16900724)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975100.942181048)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975119.374871278)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975128.225534006)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975136.357082005)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975146.117200269)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975156.417710355)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975182.929972261)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975165.598807867)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975173.911837492)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975191.235223815)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975199.904133552)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975208.715832491)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975217.393133733)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975235.106518078)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975243.719269992)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975226.416557828)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975252.70971665)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975260.557396668)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975270.495620916)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975286.790286404)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975278.541087029)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975321.0921951)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975295.979621063)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975303.872798024)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975312.148940265)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975340.082702227)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975329.507198193)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975349.089080837)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975367.411252275)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975376.314652482)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975358.657326014)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 975385.071140678)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975392.847838891)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975414.789693529)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975423.575199136)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975406.122304617)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975440.224866128)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975448.722466508)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975432.101419294)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975458.447827874)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975466.823811754)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975474.713180866)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975484.103216623)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975501.780865564)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975510.179497789)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975518.139161238)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975493.666620088)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975534.857181094)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975526.280529793)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975545.882323262)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975553.570747384)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975562.06797911)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975570.558954107)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975578.636827632)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975589.140330556)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975598.355137228)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975606.960682813)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975618.632237334)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975626.452838253)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975635.871759246)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975645.464644521)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975653.869560486)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975663.475394732)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975680.737160474)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975672.34065953)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975688.725842424)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975696.546525047)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975712.296518756)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975704.637273295)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975731.987503431)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975740.967143273)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975722.185366723)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975749.996454094)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975773.997816438)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975782.66667194)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975759.063053931)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975800.903715395)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975809.048779272)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975792.236102212)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975817.133957731)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975835.319326709)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975826.2560773)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975843.406183797)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975850.962380978)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975868.324693524)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975877.086925245)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975859.718565147)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975885.759867528)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975894.851637638)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975903.523421248)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975912.150509202)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 975920.625780934)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975936.494750476)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975945.810150638)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975928.671202434)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975954.2534486)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975963.878291645)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975982.068793528)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975972.948856239)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 975992.096757212)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976020.088566245)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976000.43683617)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976009.364302333)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976029.051547968)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976045.882510389)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976037.352733413)])']\n", "connector: \n", "Evaluating workflow: 47%|████▋ | 1173/2500 [3:04:02<3:29:29, 9.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1174/2500 [3:04:11<3:23:30, 9.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1175/2500 [3:04:20<3:23:05, 9.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1176/2500 [3:04:29<3:19:56, 9.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1177/2500 [3:04:37<3:16:48, 8.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1178/2500 [3:04:46<3:12:35, 8.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1179/2500 [3:04:55<3:12:26, 8.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1180/2500 [3:05:04<3:15:25, 8.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1181/2500 [3:05:13<3:18:38, 9.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1182/2500 [3:05:27<3:48:46, 10.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1183/2500 [3:05:39<3:58:42, 10.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1184/2500 [3:05:48<3:46:16, 10.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1185/2500 [3:05:57<3:39:50, 10.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1186/2500 [3:06:06<3:33:13, 9.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1187/2500 [3:06:15<3:29:52, 9.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1188/2500 [3:06:24<3:22:41, 9.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1189/2500 [3:06:35<3:32:23, 9.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1190/2500 [3:06:45<3:34:54, 9.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1191/2500 [3:06:54<3:27:25, 9.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1192/2500 [3:07:02<3:22:42, 9.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1193/2500 [3:07:13<3:29:57, 9.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1194/2500 [3:07:22<3:29:43, 9.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1195/2500 [3:07:32<3:26:01, 9.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1196/2500 [3:07:40<3:21:47, 9.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1197/2500 [3:07:52<3:39:51, 10.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1198/2500 [3:08:04<3:48:18, 10.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1199/2500 [3:08:11<3:28:11, 9.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1200/2500 [3:08:22<3:35:21, 9.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1201/2500 [3:08:31<3:25:28, 9.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1202/2500 [3:08:40<3:23:59, 9.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1203/2500 [3:08:50<3:26:11, 9.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1204/2500 [3:08:58<3:18:54, 9.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1205/2500 [3:09:07<3:17:39, 9.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1206/2500 [3:09:16<3:16:08, 9.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1207/2500 [3:09:26<3:21:12, 9.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1208/2500 [3:09:34<3:13:23, 8.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1209/2500 [3:09:44<3:16:42, 9.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1210/2500 [3:09:53<3:19:15, 9.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1211/2500 [3:10:02<3:15:42, 9.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1212/2500 [3:10:10<3:10:38, 8.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▊ | 1213/2500 [3:10:20<3:13:20, 9.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▊ | 1214/2500 [3:10:28<3:11:37, 8.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▊ | 1215/2500 [3:10:37<3:09:44, 8.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▊ | 1216/2500 [3:10:46<3:08:48, 8.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▊ | 1217/2500 [3:10:54<3:08:08, 8.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▊ | 1218/2500 [3:11:02<3:01:40, 8.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1219/2500 [3:11:11<3:04:21, 8.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1220/2500 [3:11:22<3:16:07, 9.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1221/2500 [3:11:30<3:12:54, 9.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1222/2500 [3:11:40<3:15:40, 9.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1223/2500 [3:11:49<3:15:44, 9.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1224/2500 [3:11:58<3:13:47, 9.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1225/2500 [3:12:05<3:01:00, 8.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1226/2500 [3:12:14<3:02:59, 8.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1227/2500 [3:12:23<3:06:46, 8.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1228/2500 [3:12:32<3:04:37, 8.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1229/2500 [3:12:44<3:28:22, 9.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1230/2500 [3:12:52<3:12:01, 9.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1231/2500 [3:13:00<3:06:38, 8.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1232/2500 [3:13:09<3:10:05, 8.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1233/2500 [3:13:19<3:12:26, 9.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1234/2500 [3:13:27<3:07:02, 8.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1235/2500 [3:13:36<3:09:21, 8.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1236/2500 [3:13:48<3:24:47, 9.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1237/2500 [3:13:56<3:18:53, 9.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1238/2500 [3:14:05<3:13:39, 9.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1239/2500 [3:14:13<3:05:20, 8.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1240/2500 [3:14:21<2:59:10, 8.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1241/2500 [3:14:29<2:58:28, 8.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1242/2500 [3:14:37<2:56:13, 8.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1243/2500 [3:14:47<3:02:15, 8.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1244/2500 [3:14:55<3:02:00, 8.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1245/2500 [3:15:05<3:04:31, 8.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1246/2500 [3:15:13<3:03:06, 8.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1247/2500 [3:15:22<3:01:33, 8.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1248/2500 [3:15:30<2:58:13, 8.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1249/2500 [3:15:39<3:03:53, 8.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1250/2500 [3:15:49<3:05:29, 8.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1251/2500 [3:15:57<3:05:05, 8.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1252/2500 [3:16:08<3:16:49, 9.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1253/2500 [3:16:18<3:21:52, 9.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1254/2500 [3:16:27<3:14:29, 9.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1255/2500 [3:16:37<3:16:53, 9.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1256/2500 [3:16:47<3:19:44, 9.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1257/2500 [3:16:57<3:23:24, 9.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1258/2500 [3:17:07<3:22:27, 9.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1259/2500 [3:17:16<3:17:03, 9.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1260/2500 [3:17:24<3:09:39, 9.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1261/2500 [3:17:33<3:10:48, 9.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1262/2500 [3:17:43<3:16:00, 9.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1263/2500 [3:17:53<3:12:56, 9.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1264/2500 [3:18:01<3:06:28, 9.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1265/2500 [3:18:09<3:01:16, 8.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1266/2500 [3:18:19<3:10:09, 9.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1267/2500 [3:18:28<3:07:41, 9.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1268/2500 [3:18:37<3:06:05, 9.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1269/2500 [3:18:46<3:05:55, 9.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1270/2500 [3:18:54<2:59:00, 8.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1271/2500 [3:19:03<2:58:29, 8.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1272/2500 [3:19:12<3:03:47, 8.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1273/2500 [3:19:23<3:10:46, 9.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1274/2500 [3:19:31<3:05:55, 9.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1275/2500 [3:19:41<3:12:39, 9.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1276/2500 [3:19:51<3:13:51, 9.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1277/2500 [3:20:01<3:18:07, 9.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1278/2500 [3:20:09<3:08:16, 9.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1279/2500 [3:20:21<3:24:24, 10.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1280/2500 [3:20:30<3:13:35, 9.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1281/2500 [3:20:40<3:18:44, 9.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 1282/2500 [3:20:50<3:18:47, 9.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 1283/2500 [3:20:59<3:13:01, 9.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 1284/2500 [3:21:08<3:12:43, 9.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 1285/2500 [3:21:16<3:04:43, 9.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 1286/2500 [3:21:26<3:08:36, 9.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 1287/2500 [3:21:37<3:15:37, 9.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1288/2500 [3:21:47<3:16:49, 9.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1289/2500 [3:21:55<3:11:31, 9.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1290/2500 [3:22:06<3:15:03, 9.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1291/2500 [3:22:14<3:08:09, 9.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1292/2500 [3:22:22<3:01:24, 9.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1293/2500 [3:22:31<3:00:52, 8.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1294/2500 [3:22:41<3:05:46, 9.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1295/2500 [3:22:49<2:58:47, 8.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1296/2500 [3:22:58<3:00:34, 9.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1297/2500 [3:23:07<2:56:35, 8.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1298/2500 [3:23:22<3:34:06, 10.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1299/2500 [3:23:32<3:28:46, 10.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1300/2500 [3:23:40<3:14:03, 9.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1301/2500 [3:23:49<3:11:17, 9.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1302/2500 [3:23:59<3:11:18, 9.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1303/2500 [3:24:07<3:06:11, 9.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1304/2500 [3:24:16<3:04:18, 9.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1305/2500 [3:24:26<3:04:49, 9.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1306/2500 [3:24:35<3:03:28, 9.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1307/2500 [3:24:44<3:04:47, 9.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1308/2500 [3:24:53<3:02:14, 9.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1309/2500 [3:25:02<3:01:05, 9.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1310/2500 [3:25:12<3:04:52, 9.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1311/2500 [3:25:21<3:04:33, 9.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1312/2500 [3:25:34<3:22:19, 10.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1313/2500 [3:25:42<3:13:41, 9.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1314/2500 [3:25:52<3:14:04, 9.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1315/2500 [3:26:01<3:09:35, 9.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1316/2500 [3:26:11<3:11:25, 9.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1317/2500 [3:26:21<3:13:59, 9.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1318/2500 [3:26:31<3:10:35, 9.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1319/2500 [3:26:41<3:12:40, 9.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1320/2500 [3:26:50<3:08:55, 9.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1321/2500 [3:27:00<3:10:19, 9.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1322/2500 [3:27:08<3:02:20, 9.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1323/2500 [3:27:18<3:06:09, 9.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1324/2500 [3:27:27<2:59:40, 9.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1325/2500 [3:27:36<3:01:39, 9.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1326/2500 [3:27:45<2:58:49, 9.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1327/2500 [3:27:54<3:00:51, 9.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1328/2500 [3:28:04<3:00:06, 9.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1329/2500 [3:28:13<2:59:09, 9.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1330/2500 [3:28:23<3:02:51, 9.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1331/2500 [3:28:32<3:03:00, 9.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1332/2500 [3:28:40<2:51:51, 8.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1333/2500 [3:28:50<3:01:08, 9.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1334/2500 [3:28:59<2:58:56, 9.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1335/2500 [3:29:09<3:02:42, 9.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1336/2500 [3:29:17<2:56:26, 9.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1337/2500 [3:29:27<3:00:44, 9.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1338/2500 [3:29:36<2:58:02, 9.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1339/2500 [3:29:49<3:17:52, 10.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1340/2500 [3:29:58<3:10:39, 9.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1341/2500 [3:30:06<3:02:32, 9.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1342/2500 [3:30:16<3:02:37, 9.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1343/2500 [3:30:25<3:04:56, 9.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1344/2500 [3:30:35<3:05:16, 9.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1345/2500 [3:30:46<3:12:08, 9.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1346/2500 [3:30:55<3:06:11, 9.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1347/2500 [3:31:04<3:05:22, 9.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1348/2500 [3:31:16<3:14:34, 10.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1349/2500 [3:31:24<3:04:50, 9.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1350/2500 [3:31:34<3:07:32, 9.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1351/2500 [3:31:45<3:10:25, 9.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1352/2500 [3:31:54<3:08:42, 9.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1353/2500 [3:32:03<3:01:14, 9.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1354/2500 [3:32:12<2:56:25, 9.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1355/2500 [3:32:20<2:51:20, 8.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1356/2500 [3:32:30<2:58:24, 9.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1357/2500 [3:32:41<3:07:56, 9.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1358/2500 [3:32:50<3:01:21, 9.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1359/2500 [3:32:59<2:58:25, 9.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1360/2500 [3:33:09<2:58:49, 9.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1361/2500 [3:33:19<3:03:05, 9.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1362/2500 [3:33:27<2:55:05, 9.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1363/2500 [3:33:35<2:49:35, 8.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1364/2500 [3:33:46<3:02:05, 9.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1365/2500 [3:33:58<3:15:34, 10.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1366/2500 [3:34:08<3:10:31, 10.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1367/2500 [3:34:17<3:04:12, 9.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1368/2500 [3:34:27<3:03:37, 9.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1369/2500 [3:34:37<3:04:56, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1370/2500 [3:34:45<2:56:01, 9.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1371/2500 [3:34:54<2:55:54, 9.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1372/2500 [3:35:03<2:53:05, 9.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1373/2500 [3:35:14<3:01:33, 9.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1374/2500 [3:35:23<2:57:43, 9.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1375/2500 [3:35:32<2:57:09, 9.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1376/2500 [3:35:42<3:01:16, 9.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1377/2500 [3:35:54<3:12:58, 10.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1378/2500 [3:36:04<3:07:44, 10.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1379/2500 [3:36:14<3:08:27, 10.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1380/2500 [3:36:23<3:04:01, 9.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1381/2500 [3:36:34<3:09:35, 10.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1382/2500 [3:36:44<3:07:04, 10.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1383/2500 [3:36:53<3:01:47, 9.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1384/2500 [3:37:03<3:02:31, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1385/2500 [3:37:13<3:05:12, 9.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1386/2500 [3:37:23<3:05:27, 9.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1387/2500 [3:37:35<3:17:13, 10.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1388/2500 [3:37:46<3:15:31, 10.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1389/2500 [3:37:56<3:13:01, 10.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1390/2500 [3:38:09<3:26:14, 11.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1391/2500 [3:38:19<3:20:43, 10.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1392/2500 [3:38:27<3:07:56, 10.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1393/2500 [3:38:38<3:09:42, 10.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1394/2500 [3:38:47<3:04:29, 10.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1395/2500 [3:38:58<3:06:20, 10.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1396/2500 [3:39:07<3:00:05, 9.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1397/2500 [3:39:17<3:01:09, 9.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1398/2500 [3:39:27<3:02:49, 9.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1399/2500 [3:39:40<3:17:41, 10.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1400/2500 [3:39:49<3:09:17, 10.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1401/2500 [3:40:01<3:19:51, 10.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1402/2500 [3:40:11<3:15:32, 10.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1403/2500 [3:40:23<3:22:41, 11.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1404/2500 [3:40:33<3:12:45, 10.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1405/2500 [3:40:43<3:09:01, 10.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1406/2500 [3:40:54<3:17:20, 10.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1407/2500 [3:41:05<3:13:20, 10.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1408/2500 [3:41:16<3:19:13, 10.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1409/2500 [3:41:29<3:26:50, 11.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1410/2500 [3:41:40<3:23:29, 11.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1411/2500 [3:41:51<3:22:54, 11.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1412/2500 [3:42:04<3:32:48, 11.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1413/2500 [3:42:15<3:32:09, 11.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1414/2500 [3:42:25<3:21:58, 11.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1415/2500 [3:42:35<3:16:17, 10.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1416/2500 [3:42:48<3:27:17, 11.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1417/2500 [3:42:58<3:18:49, 11.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1418/2500 [3:43:07<3:07:09, 10.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1419/2500 [3:43:17<3:04:01, 10.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1420/2500 [3:43:24<2:48:50, 9.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1421/2500 [3:43:34<2:50:56, 9.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1422/2500 [3:43:45<2:55:21, 9.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1423/2500 [3:43:54<2:51:42, 9.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1424/2500 [3:44:04<2:54:15, 9.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1425/2500 [3:44:13<2:52:09, 9.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1426/2500 [3:44:22<2:47:04, 9.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1427/2500 [3:44:31<2:48:22, 9.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1428/2500 [3:44:41<2:47:10, 9.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1429/2500 [3:44:50<2:49:56, 9.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1430/2500 [3:45:00<2:49:46, 9.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1431/2500 [3:45:12<3:04:49, 10.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1432/2500 [3:45:22<3:01:06, 10.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1433/2500 [3:45:32<2:58:58, 10.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1434/2500 [3:45:42<2:58:37, 10.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1435/2500 [3:45:53<3:03:22, 10.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1436/2500 [3:46:07<3:24:01, 11.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1437/2500 [3:46:21<3:34:25, 12.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1438/2500 [3:46:35<3:46:30, 12.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1439/2500 [3:46:50<3:55:38, 13.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1440/2500 [3:47:03<3:56:51, 13.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1441/2500 [3:47:16<3:56:04, 13.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1442/2500 [3:47:30<3:58:53, 13.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1443/2500 [3:47:44<4:01:04, 13.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1444/2500 [3:47:58<4:00:07, 13.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1445/2500 [3:48:11<3:56:30, 13.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1446/2500 [3:48:25<3:57:29, 13.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1447/2500 [3:48:38<3:55:29, 13.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1448/2500 [3:48:51<3:54:18, 13.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1449/2500 [3:49:06<3:59:42, 13.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1450/2500 [3:49:18<3:54:03, 13.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1451/2500 [3:49:30<3:44:25, 12.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1452/2500 [3:49:42<3:39:56, 12.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1453/2500 [3:49:53<3:34:32, 12.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1454/2500 [3:50:06<3:37:03, 12.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1455/2500 [3:50:18<3:33:15, 12.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1456/2500 [3:50:30<3:33:22, 12.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1457/2500 [3:50:41<3:26:17, 11.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1458/2500 [3:50:54<3:31:19, 12.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1459/2500 [3:51:07<3:36:26, 12.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1460/2500 [3:51:20<3:38:43, 12.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1461/2500 [3:51:33<3:39:24, 12.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1462/2500 [3:51:45<3:37:30, 12.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▊ | 1463/2500 [3:51:59<3:40:14, 12.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▊ | 1464/2500 [3:52:11<3:38:11, 12.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▊ | 1465/2500 [3:52:23<3:33:17, 12.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▊ | 1466/2500 [3:52:35<3:33:52, 12.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▊ | 1467/2500 [3:52:47<3:31:21, 12.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▊ | 1468/2500 [3:53:01<3:38:11, 12.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1469/2500 [3:53:14<3:42:15, 12.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1470/2500 [3:53:27<3:40:55, 12.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1471/2500 [3:53:40<3:42:19, 12.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1472/2500 [3:53:53<3:40:31, 12.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1473/2500 [3:54:05<3:37:58, 12.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1474/2500 [3:54:17<3:35:14, 12.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1475/2500 [3:54:31<3:39:45, 12.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1476/2500 [3:54:45<3:44:01, 13.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1477/2500 [3:54:57<3:40:23, 12.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1478/2500 [3:55:09<3:35:27, 12.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1479/2500 [3:55:22<3:34:50, 12.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1480/2500 [3:55:36<3:44:54, 13.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1481/2500 [3:55:49<3:39:38, 12.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1482/2500 [3:56:00<3:29:10, 12.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1483/2500 [3:56:10<3:19:03, 11.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1484/2500 [3:56:20<3:11:45, 11.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1485/2500 [3:56:32<3:12:11, 11.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1486/2500 [3:56:46<3:26:53, 12.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1487/2500 [3:56:59<3:29:14, 12.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1488/2500 [3:57:10<3:23:55, 12.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1489/2500 [3:57:21<3:16:39, 11.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1490/2500 [3:57:32<3:13:56, 11.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1491/2500 [3:57:44<3:14:29, 11.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1492/2500 [3:57:53<3:05:19, 11.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1493/2500 [3:58:04<3:04:13, 10.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1494/2500 [3:58:15<3:04:41, 11.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1495/2500 [3:58:26<3:04:11, 11.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1496/2500 [3:58:39<3:13:37, 11.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1497/2500 [3:58:52<3:16:39, 11.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1498/2500 [3:59:03<3:13:37, 11.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1499/2500 [3:59:15<3:15:23, 11.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1500/2500 [3:59:26<3:13:09, 11.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1501/2500 [3:59:38<3:16:10, 11.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1502/2500 [3:59:50<3:14:45, 11.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1503/2500 [4:00:01<3:14:20, 11.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1504/2500 [4:00:14<3:18:20, 11.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1505/2500 [4:00:26<3:17:03, 11.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1506/2500 [4:00:38<3:20:44, 12.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1507/2500 [4:00:49<3:11:55, 11.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1508/2500 [4:01:00<3:07:40, 11.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1509/2500 [4:01:12<3:15:23, 11.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1510/2500 [4:01:24<3:15:21, 11.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1511/2500 [4:01:37<3:21:41, 12.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1512/2500 [4:01:50<3:22:02, 12.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1513/2500 [4:02:00<3:13:32, 11.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1514/2500 [4:02:12<3:12:21, 11.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1515/2500 [4:02:24<3:12:53, 11.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1516/2500 [4:02:36<3:17:03, 12.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1517/2500 [4:02:47<3:11:10, 11.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1518/2500 [4:02:59<3:12:01, 11.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1519/2500 [4:03:10<3:08:54, 11.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1520/2500 [4:03:23<3:13:02, 11.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1521/2500 [4:03:35<3:16:30, 12.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1522/2500 [4:03:47<3:13:37, 11.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1523/2500 [4:03:58<3:12:05, 11.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1524/2500 [4:04:12<3:20:31, 12.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1525/2500 [4:04:25<3:25:34, 12.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1526/2500 [4:04:37<3:19:07, 12.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1527/2500 [4:04:48<3:15:40, 12.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1528/2500 [4:05:00<3:12:06, 11.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1529/2500 [4:05:12<3:12:49, 11.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1530/2500 [4:05:25<3:17:09, 12.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1531/2500 [4:05:35<3:07:57, 11.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 1532/2500 [4:05:45<2:58:43, 11.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 1533/2500 [4:05:55<2:56:35, 10.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 1534/2500 [4:06:07<2:59:59, 11.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 1535/2500 [4:06:18<2:57:43, 11.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 1536/2500 [4:06:28<2:54:37, 10.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 1537/2500 [4:06:38<2:49:44, 10.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1538/2500 [4:06:49<2:52:40, 10.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1539/2500 [4:06:59<2:48:33, 10.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1540/2500 [4:07:10<2:49:53, 10.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1541/2500 [4:07:20<2:45:24, 10.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1542/2500 [4:07:29<2:39:36, 10.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1543/2500 [4:07:39<2:38:43, 9.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1544/2500 [4:07:49<2:38:09, 9.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1545/2500 [4:07:59<2:39:46, 10.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1546/2500 [4:08:09<2:40:21, 10.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1547/2500 [4:08:19<2:40:21, 10.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1548/2500 [4:08:30<2:41:18, 10.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1549/2500 [4:08:40<2:41:34, 10.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1550/2500 [4:08:51<2:45:26, 10.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1551/2500 [4:09:01<2:41:20, 10.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1552/2500 [4:09:12<2:44:11, 10.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1553/2500 [4:09:22<2:42:33, 10.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1554/2500 [4:09:32<2:44:38, 10.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1555/2500 [4:09:43<2:44:02, 10.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1556/2500 [4:09:53<2:42:17, 10.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1557/2500 [4:10:03<2:41:49, 10.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1558/2500 [4:10:13<2:41:30, 10.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1559/2500 [4:10:23<2:37:25, 10.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1560/2500 [4:10:33<2:37:14, 10.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1561/2500 [4:10:44<2:40:40, 10.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1562/2500 [4:10:53<2:35:27, 9.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1563/2500 [4:11:03<2:37:47, 10.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1564/2500 [4:11:14<2:41:47, 10.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1565/2500 [4:11:25<2:40:52, 10.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1566/2500 [4:11:35<2:40:27, 10.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1567/2500 [4:11:45<2:41:03, 10.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1568/2500 [4:11:55<2:37:47, 10.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1569/2500 [4:12:06<2:40:43, 10.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1570/2500 [4:12:17<2:46:17, 10.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1571/2500 [4:12:27<2:41:48, 10.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1572/2500 [4:12:37<2:39:21, 10.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1573/2500 [4:12:47<2:36:06, 10.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1574/2500 [4:12:58<2:39:21, 10.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1575/2500 [4:13:08<2:39:02, 10.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1576/2500 [4:13:18<2:37:10, 10.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1577/2500 [4:13:31<2:50:46, 11.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1578/2500 [4:13:42<2:51:59, 11.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1579/2500 [4:13:52<2:45:30, 10.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1580/2500 [4:14:03<2:43:29, 10.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1581/2500 [4:14:14<2:44:39, 10.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1582/2500 [4:14:23<2:38:19, 10.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1583/2500 [4:14:33<2:38:09, 10.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1584/2500 [4:14:44<2:39:07, 10.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1585/2500 [4:14:54<2:36:12, 10.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1586/2500 [4:15:04<2:34:58, 10.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1587/2500 [4:15:14<2:36:35, 10.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1588/2500 [4:15:27<2:45:05, 10.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1589/2500 [4:15:36<2:38:57, 10.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1590/2500 [4:15:47<2:38:28, 10.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1591/2500 [4:15:57<2:37:32, 10.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1592/2500 [4:16:06<2:33:31, 10.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1593/2500 [4:16:17<2:36:39, 10.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1594/2500 [4:16:29<2:41:18, 10.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1595/2500 [4:16:39<2:39:54, 10.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1596/2500 [4:16:49<2:37:15, 10.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1597/2500 [4:16:59<2:35:50, 10.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1598/2500 [4:17:10<2:35:19, 10.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1599/2500 [4:17:19<2:30:24, 10.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1600/2500 [4:17:29<2:32:11, 10.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1601/2500 [4:17:40<2:33:02, 10.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1602/2500 [4:17:51<2:39:04, 10.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1603/2500 [4:18:02<2:40:20, 10.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1604/2500 [4:18:12<2:37:34, 10.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1605/2500 [4:18:23<2:37:34, 10.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1606/2500 [4:18:33<2:34:52, 10.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1607/2500 [4:18:43<2:34:40, 10.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1608/2500 [4:18:53<2:31:31, 10.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1609/2500 [4:19:03<2:31:39, 10.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1610/2500 [4:19:14<2:31:47, 10.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1611/2500 [4:19:23<2:28:51, 10.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1612/2500 [4:19:33<2:25:23, 9.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1613/2500 [4:19:41<2:20:37, 9.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1614/2500 [4:19:52<2:23:56, 9.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1615/2500 [4:20:01<2:20:02, 9.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1616/2500 [4:20:09<2:15:08, 9.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1617/2500 [4:20:18<2:13:09, 9.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1618/2500 [4:20:27<2:13:39, 9.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1619/2500 [4:20:36<2:13:43, 9.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1620/2500 [4:20:47<2:23:00, 9.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1621/2500 [4:20:56<2:18:39, 9.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1622/2500 [4:21:05<2:16:31, 9.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1623/2500 [4:21:14<2:13:51, 9.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1624/2500 [4:21:24<2:17:07, 9.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1625/2500 [4:21:32<2:11:59, 9.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1626/2500 [4:21:42<2:14:44, 9.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1627/2500 [4:21:51<2:13:42, 9.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1628/2500 [4:22:00<2:12:42, 9.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1629/2500 [4:22:09<2:13:27, 9.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1630/2500 [4:22:19<2:15:24, 9.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1631/2500 [4:22:28<2:14:21, 9.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1632/2500 [4:22:37<2:11:59, 9.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1633/2500 [4:22:48<2:20:24, 9.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1634/2500 [4:22:56<2:14:39, 9.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1635/2500 [4:23:06<2:17:01, 9.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1636/2500 [4:23:15<2:14:37, 9.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1637/2500 [4:23:25<2:18:22, 9.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1638/2500 [4:23:36<2:21:59, 9.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1639/2500 [4:23:45<2:19:14, 9.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1640/2500 [4:23:55<2:19:53, 9.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1641/2500 [4:24:04<2:15:29, 9.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1642/2500 [4:24:13<2:13:35, 9.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1643/2500 [4:24:23<2:16:59, 9.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1644/2500 [4:24:33<2:17:38, 9.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1645/2500 [4:24:43<2:19:03, 9.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1646/2500 [4:24:52<2:18:00, 9.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1647/2500 [4:25:01<2:14:26, 9.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1648/2500 [4:25:10<2:11:44, 9.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1649/2500 [4:25:20<2:13:29, 9.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1650/2500 [4:25:29<2:11:27, 9.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1651/2500 [4:25:39<2:12:58, 9.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1652/2500 [4:25:52<2:29:27, 10.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1653/2500 [4:26:02<2:26:28, 10.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1654/2500 [4:26:13<2:31:40, 10.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1655/2500 [4:26:25<2:35:35, 11.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1656/2500 [4:26:35<2:28:24, 10.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1657/2500 [4:26:44<2:23:00, 10.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1658/2500 [4:26:53<2:19:19, 9.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1659/2500 [4:27:02<2:15:38, 9.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1660/2500 [4:27:12<2:14:45, 9.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1661/2500 [4:27:22<2:15:16, 9.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1662/2500 [4:27:31<2:13:50, 9.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1663/2500 [4:27:40<2:11:11, 9.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1664/2500 [4:27:51<2:17:25, 9.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1665/2500 [4:28:01<2:16:21, 9.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1666/2500 [4:28:09<2:11:27, 9.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1667/2500 [4:28:20<2:16:58, 9.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1668/2500 [4:28:30<2:15:17, 9.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1669/2500 [4:28:40<2:16:30, 9.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1670/2500 [4:28:49<2:15:06, 9.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1671/2500 [4:29:00<2:19:29, 10.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1672/2500 [4:29:10<2:18:09, 10.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1673/2500 [4:29:19<2:15:20, 9.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1674/2500 [4:29:28<2:11:22, 9.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1675/2500 [4:29:38<2:10:57, 9.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1676/2500 [4:29:47<2:11:31, 9.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1677/2500 [4:29:57<2:12:47, 9.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1678/2500 [4:30:07<2:11:14, 9.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1679/2500 [4:30:17<2:13:10, 9.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1680/2500 [4:30:27<2:14:30, 9.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1681/2500 [4:30:36<2:11:52, 9.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1682/2500 [4:30:46<2:12:31, 9.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1683/2500 [4:30:55<2:09:23, 9.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1684/2500 [4:31:04<2:08:08, 9.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1685/2500 [4:31:14<2:09:20, 9.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1686/2500 [4:31:23<2:06:22, 9.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1687/2500 [4:31:32<2:05:54, 9.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1688/2500 [4:31:41<2:03:12, 9.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1689/2500 [4:31:50<2:04:28, 9.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1690/2500 [4:32:00<2:07:00, 9.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1691/2500 [4:32:10<2:11:38, 9.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1692/2500 [4:32:20<2:11:11, 9.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1693/2500 [4:32:30<2:11:18, 9.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1694/2500 [4:32:40<2:11:47, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1695/2500 [4:32:49<2:08:56, 9.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1696/2500 [4:32:58<2:06:21, 9.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1697/2500 [4:33:07<2:03:51, 9.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1698/2500 [4:33:17<2:05:06, 9.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1699/2500 [4:33:26<2:06:13, 9.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1700/2500 [4:33:35<2:04:44, 9.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1701/2500 [4:33:44<2:03:06, 9.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1702/2500 [4:33:54<2:03:33, 9.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1703/2500 [4:34:04<2:05:55, 9.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1704/2500 [4:34:13<2:04:01, 9.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1705/2500 [4:34:22<2:03:28, 9.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1706/2500 [4:34:32<2:05:48, 9.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1707/2500 [4:34:41<2:04:36, 9.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1708/2500 [4:34:50<2:02:48, 9.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1709/2500 [4:35:00<2:03:55, 9.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1710/2500 [4:35:10<2:06:14, 9.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1711/2500 [4:35:19<2:03:48, 9.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 1712/2500 [4:35:29<2:06:56, 9.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▊ | 1713/2500 [4:35:38<2:05:43, 9.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▊ | 1714/2500 [4:35:48<2:04:41, 9.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▊ | 1715/2500 [4:35:58<2:07:25, 9.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▊ | 1716/2500 [4:36:07<2:03:55, 9.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▊ | 1717/2500 [4:36:16<2:03:11, 9.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▊ | 1718/2500 [4:36:25<2:01:18, 9.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1719/2500 [4:36:35<2:03:27, 9.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1720/2500 [4:36:44<2:01:27, 9.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1721/2500 [4:36:53<1:57:46, 9.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1722/2500 [4:37:03<2:01:37, 9.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1723/2500 [4:37:14<2:07:06, 9.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1724/2500 [4:37:23<2:04:01, 9.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1725/2500 [4:37:30<1:56:46, 9.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1726/2500 [4:37:39<1:56:32, 9.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976055.858116619)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976076.440877778)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976084.457627666)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976065.831204742)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976105.402377371)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976114.24426735)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976124.684092667)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976094.69229917)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976141.672026026)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976133.062042002)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 976150.435319295)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 976158.803218274)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976167.097773914)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976175.212621203)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976182.873361555)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976190.840787171)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976200.297465613)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976208.759175995)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 976217.209215341)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976246.470565585)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976256.167127695)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976235.924260399)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976266.302071689)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976274.632728183)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 976283.694570207)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976301.978071039)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976311.049834589)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976318.824085041)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 976327.777967995)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976336.894746263)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976345.330723305)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976362.838932163)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976353.817776233)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976371.577166146)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976390.330827177)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976381.278998515)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 976399.610787093)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976408.854084015)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976417.295921668)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976427.017516087)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976436.023174363)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976444.558497963)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976453.251035731)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976472.325312652)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976482.159958961)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976491.342985475)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976506.616031692)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976515.844720818)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976524.250596034)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976534.057938251)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976553.307198425)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976574.112039367)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976544.400963156)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976563.131112577)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976585.110006218)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976595.164223435)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 976604.412303942)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976613.041636612)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976621.560281995)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976630.343399137)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 976640.443567858)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 976650.552400212)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976660.907010465)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976670.992116669)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976680.256463663)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976700.826019658)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 976711.50371363)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 976720.3462082)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976740.012018835)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976749.547861785)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 976760.548570931)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976777.315595495)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976786.860880839)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976797.705052133)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976768.844753984)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976807.675384921)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976826.877804244)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976817.160163053)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 976835.750219113)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976845.754419529)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976855.355160061)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976875.183313473)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 976864.889795797)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976900.839166749)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976883.222470602)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 976892.388469353)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 976908.300912007)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976917.592717173)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976925.438599719)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976934.436738987)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976945.368057232)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976963.597632923)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976972.197727147)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976955.829923058)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977006.579708643)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976979.711168472)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976987.939173088)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 976997.490053558)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977019.150345502)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977028.45581933)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977037.448968341)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977063.668611083)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977054.607402446)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977073.547619168)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977090.862456916)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977098.733495487)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977107.086378079)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977081.695785164)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977136.749347719)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977116.902389397)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977127.559979595)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977145.074455055)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 977162.576128927)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977153.711122419)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977171.514138627)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977196.615821556)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977188.106824083)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977204.717645566)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977221.956826798)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977229.844248856)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977238.503992023)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977260.130237679)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977268.104508021)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977277.443321732)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977297.18194257)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977306.741323036)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977288.080484065)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977329.072592246)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977350.483240152)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977341.422212296)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977389.174455078)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977361.039601548)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977369.315248504)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977380.513957115)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977409.444621485)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977431.489184177)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977400.408601803)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977420.210769106)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977450.705968454)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977467.371544123)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977441.910109914)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977459.208756829)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977492.213406188)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977475.772763117)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977484.06379969)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977511.717247121)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977519.650059561)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977528.813060466)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977537.384664262)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977546.604009179)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977555.127832489)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977563.058680712)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977571.217372663)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977579.423247126)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977587.614741288)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977596.095839049)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977604.271163456)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977623.008760171)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977644.114393547)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977612.56735118)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977653.197124616)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977661.559405383)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977669.634676914)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977679.196394477)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977688.534659104)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977697.086278194)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977705.220375889)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977731.91891809)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977714.58858527)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977723.019562104)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977741.113388926)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977760.397061034)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977751.058503431)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977769.713375203)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977778.285396071)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977803.921347275)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977786.144467951)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977794.339352988)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977812.372397966)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977821.095122322)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977829.183962418)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977855.706994131)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977847.381297179)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977866.333926104)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977875.461645561)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977884.521761264)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977901.786278629)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977893.639905207)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977246.326473179)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 976226.277830275)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 976292.631864357)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 976463.443362121)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 976690.205784251)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977045.863738465)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977180.824906067)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 977213.296185416)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977317.739477798)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977502.672404311)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977837.438104901)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 977912.680231907)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977921.274197331)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977930.444302656)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977939.187102274)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977947.796354988)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977956.105868363)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977974.061064516)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977964.846634944)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977983.453410224)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 977997.086610743)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978009.034566544)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978018.047075265)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978027.411277593)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978045.711409497)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978054.232907415)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978036.461054663)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978075.132856313)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978083.857450201)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978092.669562606)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978065.004532515)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978112.72804697)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978103.099605528)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 978121.821480279)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978130.667076806)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978142.750181776)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978154.197708497)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978161.652065951)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978180.825092238)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978190.110649862)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978172.380839965)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978199.90381295)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978208.344197593)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978217.383470699)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978226.330677602)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978236.231654129)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978244.382076198)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978253.899421564)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978263.462678046)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978272.20189331)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 978280.54956206)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 978289.873521035)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 978298.642039224)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978307.314409334)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978316.05050338)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978324.791332546)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978332.603738182)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978360.757424986)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978341.5469935)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 978352.043900001)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978370.265544136)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978388.401743128)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978379.486459778)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 978395.533581873)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978413.61833023)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978422.108397732)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978404.384146298)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 978434.576873237)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978441.864813666)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978450.111656897)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 978459.503280491)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978468.892893163)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978486.432226763)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978497.878839529)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978477.176634594)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978506.692028038)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978515.335439762)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978523.247248482)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 978531.111730082)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978557.113017041)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978565.796598888)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978539.554982976)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 978547.724628446)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978583.534720483)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978600.255364333)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 978574.91510049)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 978592.070340337)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978609.724916036)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978627.686806297)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978618.82354596)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 978638.48256819)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978648.779558713)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978667.109720888)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978657.335014938)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978677.084151445)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978687.331721773)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978697.023293816)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978705.959690708)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978723.70831493)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978733.813080076)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978714.320113809)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978742.843113173)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978751.178721101)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978759.413288399)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978769.6835643)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978778.555117295)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978804.47593548)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978813.147257334)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978787.452893238)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 978796.513405772)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978822.74837038)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978832.891439822)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978841.453738133)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978851.676181717)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978861.334411345)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978879.696490346)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978871.561246947)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 978891.608299604)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978899.906926052)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978920.117012668)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978910.297751714)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978956.486583489)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978928.988486366)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 978938.480897403)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 978946.699181463)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978976.892151029)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 978966.992097789)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 978985.78794346)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 978995.886200194)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979021.636978028)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979031.465862513)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979004.44398677)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979012.691647943)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979048.797501368)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979039.575108588)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979057.158826226)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979072.234702846)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979082.063524769)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979099.33642958)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979090.068202436)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979108.938568103)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979117.692463953)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979126.737605695)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979136.095437084)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979145.174032195)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979154.640074259)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979163.533757661)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979172.540010056)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979191.618404156)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979182.322643738)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979203.948241211)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979212.742068661)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979231.713781027)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979241.650727229)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979222.623673472)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979251.811569982)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979261.101989522)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979280.339193589)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979271.157011811)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979290.208693229)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979298.567552789)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979316.942069302)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979308.527957579)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979326.47215005)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979344.805576187)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979353.9547201)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979335.292339893)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979363.037982602)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979372.878085393)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979382.305843261)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979389.817103608)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979409.223131676)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979400.262571266)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979419.103929585)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979427.464382528)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979446.211435397)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979458.846714597)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979467.859670369)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979437.324969957)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979476.348047924)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979485.83913152)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979495.728975953)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979505.404506232)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979525.218776123)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979534.783689218)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979516.238870899)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979546.055849108)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979554.529081601)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979564.662486631)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979574.976801189)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979593.238865742)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979601.907168348)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979610.282686479)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979584.650457459)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979631.575198736)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979640.316715065)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979620.52367236)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979669.027729669)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979677.295434058)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979649.360029948)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979658.839395165)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979708.784297351)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979718.263627054)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979685.587747047)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979696.762390152)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 979736.938170695)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979727.259545106)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979746.932344451)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979755.196438199)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979764.549673572)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979773.42711471)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979784.163425965)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979793.175464143)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979812.783394068)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979802.573816512)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979824.571197376)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979844.176875532)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979853.501635705)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979864.384641816)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979833.979501256)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979874.13058163)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979903.504521324)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979883.254204662)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 979893.178862025)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979913.545666131)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979936.035943428)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979946.166341918)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979925.67707971)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979969.190643507)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979977.777461091)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979988.302860047)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979959.004997365)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980008.048367875)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 980027.075922907)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 979997.674868615)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 980017.064632999)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980037.262091416)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980049.947313806)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980071.504744057)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980059.224499888)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980081.661608712)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980093.682376158)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980102.992060965)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980112.894301227)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980124.802523668)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980134.927988902)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 980146.650479706)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980159.027106098)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980180.949858713)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980169.822419576)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 980193.98405631)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980215.507586676)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980225.653938984)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980238.569582724)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980205.63511347)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980248.514457283)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980257.407959167)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980267.239901645)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980274.674079256)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980284.472128356)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980294.825110868)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 980303.939653565)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 980314.007077939)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980332.057268331)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980350.881872696)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980323.36445384)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 980341.661624508)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980360.784947191)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980382.668780146)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980370.304837784)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 980392.37919852)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980402.185313526)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980423.192472258)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980412.214486758)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980437.437813572)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980450.934844298)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980465.35223873)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 980479.910528893)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980493.509484908)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980520.760706887)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980534.765281105)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980506.811142793)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980548.310666839)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980574.993185006)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980561.312902308)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 980588.173615674)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980615.842509946)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980601.410408839)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 980628.494420727)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980640.076262288)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980652.097472606)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980663.699695315)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980676.51426715)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980724.39967026)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 980688.275650777)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980700.58407178)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 980711.528410572)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980737.589598794)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980750.542473612)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980775.678748725)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980763.335060499)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980788.82075792)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980801.209006485)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 980812.93864228)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 980825.455106248)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980851.060506003)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980837.420385419)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980864.5754118)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 980877.293270402)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980890.475530148)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980903.131095263)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 980915.54654954)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980927.789786368)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980941.299845964)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980967.499252732)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980955.038489471)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981006.70938165)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981018.951244585)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 980979.499528901)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 980992.069543974)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981040.248981559)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981029.870416675)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981050.593819045)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981062.041815506)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981076.338209982)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981089.084040919)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981100.468421326)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981111.160063248)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981122.333738876)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981134.001648863)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981143.786414213)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981176.695260046)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981189.608047349)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981154.635589208)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981165.742951825)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981201.82087886)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981213.018400542)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981225.003835045)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981236.307364734)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981248.541091747)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981260.078415871)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981271.741725281)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981284.280297491)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981308.675771412)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981319.058416944)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981296.009148999)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981329.837542177)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981342.784180546)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981354.646057287)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981367.806492259)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981380.153593799)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981414.160480876)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981390.743654849)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981402.308784505)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981426.796988865)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981437.656026103)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981449.539243284)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981485.681171754)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981460.676439275)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981473.111670519)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981497.1735813)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981508.781640045)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981535.752342788)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981547.119702852)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981522.34523184)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981558.718743299)])']\n", "connector: \n", "Evaluating workflow: 69%|██████▉ | 1727/2500 [4:37:52<2:08:23, 9.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1728/2500 [4:38:01<2:07:21, 9.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1729/2500 [4:38:10<2:01:32, 9.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1730/2500 [4:38:19<2:02:23, 9.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1731/2500 [4:38:30<2:04:20, 9.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1732/2500 [4:38:39<2:04:22, 9.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1733/2500 [4:38:50<2:09:51, 10.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1734/2500 [4:39:00<2:07:24, 9.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1735/2500 [4:39:10<2:06:22, 9.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1736/2500 [4:39:20<2:09:19, 10.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 1737/2500 [4:39:30<2:07:16, 10.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1738/2500 [4:39:41<2:09:15, 10.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1739/2500 [4:39:50<2:06:57, 10.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1740/2500 [4:40:04<2:20:40, 11.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1741/2500 [4:40:13<2:13:39, 10.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1742/2500 [4:40:22<2:05:23, 9.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1743/2500 [4:40:32<2:04:44, 9.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1744/2500 [4:40:42<2:05:03, 9.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1745/2500 [4:40:52<2:07:31, 10.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1746/2500 [4:41:03<2:08:46, 10.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1747/2500 [4:41:12<2:06:17, 10.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1748/2500 [4:41:23<2:09:51, 10.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 1749/2500 [4:41:33<2:05:52, 10.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1750/2500 [4:41:43<2:06:07, 10.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1751/2500 [4:41:53<2:04:25, 9.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1752/2500 [4:42:04<2:11:14, 10.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1753/2500 [4:42:14<2:06:13, 10.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1754/2500 [4:42:23<2:02:14, 9.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1755/2500 [4:42:32<2:00:03, 9.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1756/2500 [4:42:42<2:00:20, 9.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1757/2500 [4:42:51<1:59:37, 9.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1758/2500 [4:43:02<2:03:35, 9.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1759/2500 [4:43:11<1:57:36, 9.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1760/2500 [4:43:21<1:59:19, 9.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1761/2500 [4:43:29<1:55:39, 9.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 1762/2500 [4:43:39<1:58:00, 9.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1763/2500 [4:43:49<1:59:10, 9.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1764/2500 [4:44:00<2:02:35, 9.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1765/2500 [4:44:09<1:57:32, 9.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1766/2500 [4:44:19<1:59:46, 9.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1767/2500 [4:44:28<1:58:13, 9.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1768/2500 [4:44:38<1:57:13, 9.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1769/2500 [4:44:47<1:55:25, 9.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1770/2500 [4:44:56<1:55:19, 9.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1771/2500 [4:45:05<1:52:26, 9.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1772/2500 [4:45:18<2:03:37, 10.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1773/2500 [4:45:28<2:03:50, 10.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1774/2500 [4:45:36<1:54:31, 9.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1775/2500 [4:45:45<1:53:07, 9.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1776/2500 [4:45:54<1:53:10, 9.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1777/2500 [4:46:03<1:52:57, 9.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1778/2500 [4:46:13<1:54:02, 9.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1779/2500 [4:46:22<1:51:35, 9.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1780/2500 [4:46:31<1:49:39, 9.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 1781/2500 [4:46:40<1:51:10, 9.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 1782/2500 [4:46:49<1:49:59, 9.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 1783/2500 [4:46:58<1:49:10, 9.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 1784/2500 [4:47:07<1:46:19, 8.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 1785/2500 [4:47:16<1:45:54, 8.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 1786/2500 [4:47:26<1:50:27, 9.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 1787/2500 [4:47:35<1:49:19, 9.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1788/2500 [4:47:44<1:47:27, 9.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1789/2500 [4:47:52<1:45:18, 8.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1790/2500 [4:48:07<2:06:57, 10.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1791/2500 [4:48:17<2:05:05, 10.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1792/2500 [4:48:27<2:01:56, 10.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1793/2500 [4:48:39<2:08:42, 10.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1794/2500 [4:48:49<2:02:20, 10.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1795/2500 [4:49:00<2:05:34, 10.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1796/2500 [4:49:10<2:01:52, 10.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1797/2500 [4:49:20<2:00:54, 10.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1798/2500 [4:49:29<1:58:09, 10.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1799/2500 [4:49:38<1:54:39, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1800/2500 [4:49:47<1:50:27, 9.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1801/2500 [4:49:56<1:46:25, 9.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1802/2500 [4:50:06<1:50:36, 9.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1803/2500 [4:50:16<1:51:24, 9.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1804/2500 [4:50:25<1:51:57, 9.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1805/2500 [4:50:34<1:49:35, 9.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1806/2500 [4:50:44<1:48:52, 9.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1807/2500 [4:50:53<1:49:06, 9.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1808/2500 [4:51:02<1:47:25, 9.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1809/2500 [4:51:11<1:43:34, 8.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1810/2500 [4:51:20<1:44:17, 9.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1811/2500 [4:51:30<1:47:15, 9.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 1812/2500 [4:51:41<1:52:16, 9.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1813/2500 [4:51:52<1:56:12, 10.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1814/2500 [4:52:00<1:50:23, 9.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1815/2500 [4:52:10<1:50:19, 9.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1816/2500 [4:52:19<1:48:26, 9.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1817/2500 [4:52:28<1:47:05, 9.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1818/2500 [4:52:37<1:44:34, 9.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1819/2500 [4:52:46<1:44:24, 9.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1820/2500 [4:52:56<1:46:56, 9.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1821/2500 [4:53:05<1:46:53, 9.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1822/2500 [4:53:15<1:48:30, 9.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1823/2500 [4:53:25<1:46:51, 9.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1824/2500 [4:53:36<1:52:06, 9.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1825/2500 [4:53:45<1:50:16, 9.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1826/2500 [4:53:56<1:54:56, 10.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1827/2500 [4:54:07<1:55:51, 10.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1828/2500 [4:54:23<2:14:07, 11.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1829/2500 [4:54:33<2:07:11, 11.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1830/2500 [4:54:44<2:05:49, 11.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1831/2500 [4:54:54<2:02:24, 10.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1832/2500 [4:55:04<1:58:02, 10.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1833/2500 [4:55:13<1:54:07, 10.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1834/2500 [4:55:24<1:54:29, 10.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1835/2500 [4:55:34<1:55:51, 10.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1836/2500 [4:55:44<1:52:22, 10.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 1837/2500 [4:55:53<1:50:16, 9.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 1838/2500 [4:56:03<1:48:24, 9.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 1839/2500 [4:56:12<1:46:57, 9.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 1840/2500 [4:56:22<1:47:55, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 1841/2500 [4:56:31<1:44:45, 9.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 1842/2500 [4:56:40<1:41:58, 9.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 1843/2500 [4:56:49<1:39:40, 9.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1844/2500 [4:56:59<1:43:19, 9.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1845/2500 [4:57:08<1:42:01, 9.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1846/2500 [4:57:17<1:41:22, 9.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1847/2500 [4:57:27<1:42:01, 9.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1848/2500 [4:57:37<1:44:51, 9.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1849/2500 [4:57:47<1:45:53, 9.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1850/2500 [4:57:56<1:43:11, 9.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1851/2500 [4:58:05<1:39:40, 9.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1852/2500 [4:58:18<1:53:40, 10.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1853/2500 [4:58:29<1:53:08, 10.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1854/2500 [4:58:38<1:50:18, 10.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1855/2500 [4:58:48<1:47:05, 9.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1856/2500 [4:58:57<1:44:59, 9.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1857/2500 [4:59:06<1:43:22, 9.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1858/2500 [4:59:15<1:41:08, 9.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1859/2500 [4:59:24<1:39:23, 9.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1860/2500 [4:59:33<1:38:48, 9.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1861/2500 [4:59:43<1:40:10, 9.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 1862/2500 [4:59:52<1:38:09, 9.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1863/2500 [5:00:01<1:39:00, 9.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1864/2500 [5:00:12<1:42:59, 9.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1865/2500 [5:00:22<1:44:02, 9.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1866/2500 [5:00:33<1:45:43, 10.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1867/2500 [5:00:43<1:45:35, 10.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1868/2500 [5:00:52<1:44:35, 9.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1869/2500 [5:01:02<1:44:13, 9.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1870/2500 [5:01:12<1:43:03, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1871/2500 [5:01:21<1:42:09, 9.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1872/2500 [5:01:31<1:40:56, 9.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1873/2500 [5:01:40<1:39:31, 9.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 1874/2500 [5:01:49<1:38:35, 9.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1875/2500 [5:02:00<1:42:52, 9.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1876/2500 [5:02:10<1:42:03, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1877/2500 [5:02:19<1:38:51, 9.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1878/2500 [5:02:28<1:36:50, 9.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1879/2500 [5:02:36<1:34:46, 9.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1880/2500 [5:02:46<1:36:00, 9.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1881/2500 [5:02:56<1:37:39, 9.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1882/2500 [5:03:07<1:42:24, 9.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1883/2500 [5:03:17<1:43:02, 10.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1884/2500 [5:03:26<1:40:14, 9.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1885/2500 [5:03:37<1:41:42, 9.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1886/2500 [5:03:46<1:41:02, 9.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 1887/2500 [5:03:56<1:40:34, 9.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1888/2500 [5:04:07<1:42:21, 10.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1889/2500 [5:04:16<1:40:27, 9.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1890/2500 [5:04:26<1:39:31, 9.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1891/2500 [5:04:36<1:39:36, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1892/2500 [5:04:46<1:41:37, 10.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1893/2500 [5:04:58<1:46:33, 10.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1894/2500 [5:05:09<1:47:09, 10.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1895/2500 [5:05:19<1:46:42, 10.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1896/2500 [5:05:29<1:45:02, 10.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1897/2500 [5:05:40<1:45:18, 10.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1898/2500 [5:05:51<1:48:00, 10.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1899/2500 [5:06:01<1:45:33, 10.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1900/2500 [5:06:12<1:45:04, 10.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1901/2500 [5:06:22<1:43:24, 10.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1902/2500 [5:06:31<1:41:39, 10.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1903/2500 [5:06:42<1:42:30, 10.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1904/2500 [5:06:52<1:40:56, 10.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1905/2500 [5:07:02<1:40:31, 10.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 1906/2500 [5:07:13<1:42:31, 10.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 1907/2500 [5:07:23<1:42:53, 10.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 1908/2500 [5:07:33<1:40:05, 10.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 1909/2500 [5:07:43<1:38:42, 10.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 1910/2500 [5:07:52<1:37:24, 9.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 1911/2500 [5:08:02<1:36:01, 9.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 1912/2500 [5:08:12<1:37:15, 9.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1913/2500 [5:08:21<1:34:27, 9.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1914/2500 [5:08:31<1:35:46, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1915/2500 [5:08:41<1:36:33, 9.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1916/2500 [5:08:51<1:36:54, 9.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1917/2500 [5:09:03<1:42:02, 10.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1918/2500 [5:09:13<1:39:36, 10.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1919/2500 [5:09:23<1:39:23, 10.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1920/2500 [5:09:34<1:39:31, 10.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1921/2500 [5:09:44<1:39:42, 10.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1922/2500 [5:09:54<1:37:49, 10.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1923/2500 [5:10:05<1:42:10, 10.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1924/2500 [5:10:16<1:42:10, 10.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1925/2500 [5:10:25<1:38:04, 10.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1926/2500 [5:10:36<1:38:21, 10.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1927/2500 [5:10:45<1:34:54, 9.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1928/2500 [5:10:54<1:33:28, 9.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1929/2500 [5:11:04<1:32:34, 9.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1930/2500 [5:11:15<1:36:13, 10.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1931/2500 [5:11:26<1:37:33, 10.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1932/2500 [5:11:36<1:36:55, 10.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1933/2500 [5:11:47<1:38:45, 10.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1934/2500 [5:11:57<1:38:01, 10.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1935/2500 [5:12:10<1:44:56, 11.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1936/2500 [5:12:20<1:42:16, 10.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 1937/2500 [5:12:30<1:39:32, 10.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1938/2500 [5:12:39<1:35:47, 10.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1939/2500 [5:12:49<1:35:06, 10.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1940/2500 [5:12:59<1:32:11, 9.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1941/2500 [5:13:09<1:32:24, 9.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1942/2500 [5:13:19<1:33:11, 10.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1943/2500 [5:13:29<1:32:26, 9.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1944/2500 [5:13:39<1:31:49, 9.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1945/2500 [5:13:49<1:32:11, 9.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1946/2500 [5:13:58<1:31:21, 9.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1947/2500 [5:14:08<1:31:11, 9.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1948/2500 [5:14:19<1:32:36, 10.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1949/2500 [5:14:28<1:31:23, 9.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1950/2500 [5:14:38<1:29:44, 9.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1951/2500 [5:14:51<1:39:19, 10.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1952/2500 [5:15:01<1:35:36, 10.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1953/2500 [5:15:11<1:34:49, 10.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1954/2500 [5:15:22<1:36:37, 10.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1955/2500 [5:15:32<1:34:47, 10.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1956/2500 [5:15:42<1:33:43, 10.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1957/2500 [5:15:52<1:31:10, 10.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1958/2500 [5:16:03<1:33:26, 10.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1959/2500 [5:16:14<1:37:06, 10.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1960/2500 [5:16:24<1:32:47, 10.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1961/2500 [5:16:34<1:33:31, 10.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 1962/2500 [5:16:44<1:31:04, 10.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▊ | 1963/2500 [5:16:54<1:29:43, 10.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▊ | 1964/2500 [5:17:03<1:28:11, 9.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▊ | 1965/2500 [5:17:12<1:25:54, 9.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▊ | 1966/2500 [5:17:23<1:28:04, 9.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▊ | 1967/2500 [5:17:33<1:29:41, 10.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▊ | 1968/2500 [5:17:43<1:29:35, 10.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1969/2500 [5:17:54<1:30:15, 10.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1970/2500 [5:18:06<1:35:44, 10.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1971/2500 [5:18:17<1:34:20, 10.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1972/2500 [5:18:26<1:31:15, 10.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1973/2500 [5:18:35<1:27:11, 9.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1974/2500 [5:18:45<1:26:22, 9.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1975/2500 [5:18:55<1:28:00, 10.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1976/2500 [5:19:05<1:26:31, 9.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1977/2500 [5:19:14<1:25:27, 9.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1978/2500 [5:19:26<1:30:01, 10.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1979/2500 [5:19:36<1:27:59, 10.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1980/2500 [5:19:46<1:27:37, 10.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1981/2500 [5:19:56<1:27:36, 10.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1982/2500 [5:20:15<1:50:53, 12.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1983/2500 [5:20:27<1:47:24, 12.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1984/2500 [5:20:36<1:40:36, 11.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1985/2500 [5:20:46<1:35:47, 11.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1986/2500 [5:20:56<1:30:52, 10.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 1987/2500 [5:21:05<1:28:30, 10.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1988/2500 [5:21:15<1:26:01, 10.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1989/2500 [5:21:27<1:30:26, 10.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1990/2500 [5:21:37<1:29:01, 10.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1991/2500 [5:21:47<1:29:04, 10.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1992/2500 [5:21:57<1:27:26, 10.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1993/2500 [5:22:07<1:26:17, 10.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1994/2500 [5:22:19<1:29:00, 10.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1995/2500 [5:22:29<1:27:31, 10.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1996/2500 [5:22:43<1:37:00, 11.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1997/2500 [5:22:54<1:35:29, 11.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1998/2500 [5:23:04<1:31:35, 10.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 1999/2500 [5:23:14<1:30:00, 10.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2000/2500 [5:23:25<1:29:21, 10.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2001/2500 [5:23:35<1:27:45, 10.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2002/2500 [5:23:44<1:23:53, 10.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2003/2500 [5:23:55<1:25:35, 10.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2004/2500 [5:24:04<1:22:56, 10.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2005/2500 [5:24:14<1:21:01, 9.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2006/2500 [5:24:23<1:19:44, 9.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2007/2500 [5:24:34<1:21:41, 9.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2008/2500 [5:24:43<1:19:41, 9.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2009/2500 [5:24:52<1:18:59, 9.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2010/2500 [5:25:03<1:20:24, 9.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2011/2500 [5:25:12<1:19:46, 9.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2012/2500 [5:25:22<1:20:09, 9.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2013/2500 [5:25:32<1:20:00, 9.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2014/2500 [5:25:41<1:18:25, 9.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2015/2500 [5:25:51<1:18:04, 9.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2016/2500 [5:26:03<1:23:24, 10.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2017/2500 [5:26:13<1:22:30, 10.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2018/2500 [5:26:23<1:22:28, 10.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2019/2500 [5:26:33<1:21:41, 10.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2020/2500 [5:26:44<1:22:27, 10.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2021/2500 [5:26:54<1:21:42, 10.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2022/2500 [5:27:03<1:19:26, 9.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2023/2500 [5:27:13<1:19:50, 10.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2024/2500 [5:27:24<1:20:21, 10.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2025/2500 [5:27:34<1:20:23, 10.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2026/2500 [5:27:44<1:18:56, 9.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2027/2500 [5:27:53<1:17:08, 9.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2028/2500 [5:28:01<1:14:06, 9.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2029/2500 [5:28:11<1:14:29, 9.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2030/2500 [5:28:26<1:26:18, 11.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2031/2500 [5:28:36<1:23:58, 10.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 2032/2500 [5:28:46<1:22:20, 10.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 2033/2500 [5:28:56<1:20:12, 10.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 2034/2500 [5:29:05<1:17:49, 10.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 2035/2500 [5:29:16<1:20:58, 10.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 2036/2500 [5:29:27<1:20:42, 10.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 2037/2500 [5:29:37<1:19:33, 10.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2038/2500 [5:29:47<1:18:16, 10.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2039/2500 [5:29:56<1:16:41, 9.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2040/2500 [5:30:06<1:15:34, 9.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2041/2500 [5:30:15<1:14:50, 9.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2042/2500 [5:30:26<1:15:46, 9.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2043/2500 [5:30:36<1:17:08, 10.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2044/2500 [5:30:46<1:16:11, 10.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2045/2500 [5:30:56<1:16:44, 10.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2046/2500 [5:31:08<1:19:43, 10.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2047/2500 [5:31:18<1:18:09, 10.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2048/2500 [5:31:27<1:15:43, 10.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2049/2500 [5:31:36<1:13:45, 9.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2050/2500 [5:31:49<1:20:35, 10.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2051/2500 [5:31:59<1:18:42, 10.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2052/2500 [5:32:10<1:19:19, 10.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2053/2500 [5:32:20<1:16:25, 10.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2054/2500 [5:32:29<1:15:09, 10.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2055/2500 [5:32:39<1:14:25, 10.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2056/2500 [5:32:48<1:11:47, 9.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2057/2500 [5:32:59<1:13:58, 10.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2058/2500 [5:33:09<1:13:27, 9.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2059/2500 [5:33:18<1:11:43, 9.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2070/2500 [5:35:10<1:19:57, 11.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2071/2500 [5:35:20<1:18:01, 10.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2072/2500 [5:35:29<1:12:52, 10.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2073/2500 [5:35:40<1:13:59, 10.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2074/2500 [5:35:50<1:12:56, 10.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2075/2500 [5:36:00<1:12:11, 10.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2076/2500 [5:36:11<1:14:23, 10.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2077/2500 [5:36:21<1:13:32, 10.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2078/2500 [5:36:32<1:13:16, 10.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2079/2500 [5:36:43<1:15:34, 10.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2080/2500 [5:36:55<1:17:49, 11.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2081/2500 [5:37:08<1:20:13, 11.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2082/2500 [5:37:18<1:17:24, 11.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2083/2500 [5:37:29<1:16:48, 11.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2084/2500 [5:37:39<1:15:34, 10.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2085/2500 [5:37:51<1:16:19, 11.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2086/2500 [5:38:00<1:13:23, 10.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2087/2500 [5:38:12<1:15:42, 11.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2088/2500 [5:38:24<1:16:46, 11.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2089/2500 [5:38:33<1:12:18, 10.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2090/2500 [5:38:45<1:15:21, 11.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2091/2500 [5:38:56<1:14:38, 10.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2092/2500 [5:39:08<1:16:47, 11.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2093/2500 [5:39:22<1:21:43, 12.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2094/2500 [5:39:32<1:17:54, 11.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2095/2500 [5:39:44<1:18:45, 11.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2096/2500 [5:39:55<1:18:14, 11.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2097/2500 [5:40:06<1:16:45, 11.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2098/2500 [5:40:16<1:13:37, 10.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2099/2500 [5:40:29<1:16:24, 11.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2100/2500 [5:40:40<1:15:52, 11.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2101/2500 [5:40:51<1:13:42, 11.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2102/2500 [5:41:01<1:11:38, 10.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2103/2500 [5:41:11<1:09:34, 10.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2104/2500 [5:41:21<1:09:02, 10.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2105/2500 [5:41:33<1:12:28, 11.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2106/2500 [5:41:43<1:10:05, 10.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2107/2500 [5:41:54<1:09:51, 10.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2108/2500 [5:42:04<1:09:24, 10.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2109/2500 [5:42:16<1:10:46, 10.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2110/2500 [5:42:26<1:09:59, 10.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2111/2500 [5:42:39<1:13:25, 11.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2112/2500 [5:42:50<1:12:47, 11.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2113/2500 [5:43:01<1:11:55, 11.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2114/2500 [5:43:11<1:09:15, 10.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2115/2500 [5:43:22<1:09:50, 10.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2116/2500 [5:43:31<1:06:02, 10.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2117/2500 [5:43:41<1:05:07, 10.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2118/2500 [5:43:51<1:05:38, 10.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2119/2500 [5:44:03<1:07:59, 10.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2120/2500 [5:44:13<1:05:44, 10.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2121/2500 [5:44:27<1:12:34, 11.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2122/2500 [5:44:40<1:15:39, 12.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2123/2500 [5:44:53<1:17:22, 12.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2124/2500 [5:45:06<1:19:02, 12.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2125/2500 [5:45:18<1:17:27, 12.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2126/2500 [5:45:30<1:16:31, 12.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2127/2500 [5:45:43<1:16:41, 12.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2128/2500 [5:45:56<1:18:44, 12.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2129/2500 [5:46:09<1:18:40, 12.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2130/2500 [5:46:21<1:18:05, 12.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2131/2500 [5:46:33<1:16:39, 12.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2132/2500 [5:46:44<1:13:12, 11.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2133/2500 [5:46:57<1:15:09, 12.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2134/2500 [5:47:10<1:16:23, 12.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2135/2500 [5:47:22<1:15:04, 12.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2136/2500 [5:47:33<1:11:34, 11.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2137/2500 [5:47:43<1:09:13, 11.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2138/2500 [5:47:55<1:10:10, 11.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2139/2500 [5:48:07<1:09:24, 11.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2140/2500 [5:48:19<1:11:23, 11.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2141/2500 [5:48:30<1:09:02, 11.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2142/2500 [5:48:42<1:09:50, 11.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2143/2500 [5:48:54<1:10:34, 11.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2144/2500 [5:49:07<1:11:28, 12.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2145/2500 [5:49:19<1:10:51, 11.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2146/2500 [5:49:31<1:11:22, 12.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2147/2500 [5:49:44<1:11:58, 12.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2148/2500 [5:49:56<1:11:55, 12.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2149/2500 [5:50:08<1:12:02, 12.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2150/2500 [5:50:21<1:12:37, 12.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2151/2500 [5:50:36<1:15:41, 13.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2152/2500 [5:50:49<1:15:57, 13.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2153/2500 [5:51:01<1:13:50, 12.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2154/2500 [5:51:13<1:11:56, 12.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2155/2500 [5:51:25<1:11:28, 12.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2156/2500 [5:51:37<1:09:42, 12.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2157/2500 [5:51:49<1:09:41, 12.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2158/2500 [5:51:59<1:06:56, 11.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2159/2500 [5:52:12<1:08:33, 12.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2160/2500 [5:52:24<1:08:11, 12.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2161/2500 [5:52:38<1:10:39, 12.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2162/2500 [5:52:50<1:09:26, 12.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2163/2500 [5:53:02<1:09:20, 12.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2164/2500 [5:53:14<1:08:47, 12.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2165/2500 [5:53:27<1:08:58, 12.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2166/2500 [5:53:39<1:08:52, 12.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2167/2500 [5:53:52<1:09:13, 12.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2168/2500 [5:54:05<1:09:21, 12.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2169/2500 [5:54:17<1:08:05, 12.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2170/2500 [5:54:29<1:07:27, 12.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2171/2500 [5:54:41<1:07:56, 12.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2172/2500 [5:54:53<1:07:09, 12.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2173/2500 [5:55:06<1:06:51, 12.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2174/2500 [5:55:18<1:06:26, 12.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2175/2500 [5:55:30<1:05:58, 12.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2176/2500 [5:55:41<1:04:46, 11.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2177/2500 [5:55:55<1:07:09, 12.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2178/2500 [5:56:09<1:09:09, 12.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2179/2500 [5:56:22<1:09:20, 12.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2180/2500 [5:56:35<1:09:43, 13.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2181/2500 [5:56:46<1:06:29, 12.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2182/2500 [5:56:59<1:05:53, 12.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2183/2500 [5:57:10<1:03:39, 12.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2184/2500 [5:57:23<1:05:26, 12.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2185/2500 [5:57:36<1:05:45, 12.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2186/2500 [5:57:48<1:05:12, 12.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2187/2500 [5:58:00<1:04:18, 12.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2188/2500 [5:58:13<1:05:05, 12.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2189/2500 [5:58:26<1:05:40, 12.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2190/2500 [5:58:40<1:06:28, 12.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2191/2500 [5:58:52<1:04:54, 12.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2192/2500 [5:59:04<1:04:54, 12.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2193/2500 [5:59:16<1:03:38, 12.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2194/2500 [5:59:29<1:04:20, 12.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2195/2500 [5:59:42<1:04:41, 12.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2196/2500 [5:59:54<1:03:10, 12.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2197/2500 [6:00:07<1:02:57, 12.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2198/2500 [6:00:20<1:03:38, 12.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2199/2500 [6:00:32<1:02:58, 12.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2200/2500 [6:00:44<1:02:32, 12.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2201/2500 [6:00:58<1:04:24, 12.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2202/2500 [6:01:10<1:02:55, 12.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2203/2500 [6:01:23<1:01:59, 12.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2204/2500 [6:01:34<1:00:50, 12.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2205/2500 [6:01:48<1:02:03, 12.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2206/2500 [6:02:00<1:01:34, 12.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2207/2500 [6:02:14<1:02:37, 12.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2208/2500 [6:02:28<1:04:12, 13.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2209/2500 [6:02:40<1:02:07, 12.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2210/2500 [6:02:51<59:40, 12.35s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2211/2500 [6:03:04<1:00:01, 12.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2212/2500 [6:03:18<1:02:09, 12.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▊ | 2213/2500 [6:03:29<1:00:24, 12.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▊ | 2214/2500 [6:03:43<1:00:54, 12.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▊ | 2215/2500 [6:03:56<1:01:39, 12.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▊ | 2216/2500 [6:04:08<1:00:09, 12.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▊ | 2217/2500 [6:04:22<1:02:11, 13.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▊ | 2218/2500 [6:04:36<1:01:51, 13.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2219/2500 [6:04:48<1:01:19, 13.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2220/2500 [6:05:01<1:00:56, 13.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2221/2500 [6:05:15<1:01:17, 13.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2222/2500 [6:05:30<1:03:56, 13.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2223/2500 [6:05:43<1:03:00, 13.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2224/2500 [6:05:57<1:03:10, 13.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2225/2500 [6:06:13<1:04:53, 14.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2226/2500 [6:06:26<1:03:31, 13.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2227/2500 [6:06:39<1:02:18, 13.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2228/2500 [6:06:53<1:01:46, 13.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2229/2500 [6:07:06<1:01:12, 13.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2230/2500 [6:07:19<1:00:57, 13.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2231/2500 [6:07:33<1:01:01, 13.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2232/2500 [6:07:47<1:00:36, 13.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2233/2500 [6:07:59<58:17, 13.10s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2234/2500 [6:08:11<57:10, 12.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2235/2500 [6:08:25<57:47, 13.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2236/2500 [6:08:37<56:05, 12.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2237/2500 [6:08:51<57:41, 13.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2238/2500 [6:09:04<57:13, 13.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2239/2500 [6:09:16<56:24, 12.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2240/2500 [6:09:31<58:20, 13.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2241/2500 [6:09:43<56:47, 13.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2242/2500 [6:09:57<57:29, 13.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2243/2500 [6:10:11<57:06, 13.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2244/2500 [6:10:24<57:00, 13.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2245/2500 [6:10:37<56:16, 13.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2246/2500 [6:10:49<55:13, 13.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2247/2500 [6:11:03<55:11, 13.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2248/2500 [6:11:18<57:53, 13.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2249/2500 [6:11:31<56:28, 13.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2250/2500 [6:11:45<56:45, 13.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2251/2500 [6:11:58<56:22, 13.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2252/2500 [6:12:11<54:42, 13.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2253/2500 [6:12:24<54:48, 13.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2254/2500 [6:12:37<54:20, 13.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2255/2500 [6:12:51<54:44, 13.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2256/2500 [6:13:04<53:43, 13.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2257/2500 [6:13:17<53:31, 13.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2258/2500 [6:13:31<54:30, 13.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2259/2500 [6:13:44<53:21, 13.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2260/2500 [6:13:58<53:44, 13.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2261/2500 [6:14:12<54:36, 13.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2262/2500 [6:14:35<1:04:45, 16.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2263/2500 [6:14:47<1:00:12, 15.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2264/2500 [6:15:00<57:25, 14.60s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2265/2500 [6:15:29<1:13:41, 18.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2266/2500 [6:15:46<1:10:48, 18.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2267/2500 [6:16:02<1:07:51, 17.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2268/2500 [6:16:15<1:02:43, 16.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2269/2500 [6:16:28<59:25, 15.43s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2270/2500 [6:16:41<55:33, 14.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2271/2500 [6:16:54<54:07, 14.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2272/2500 [6:17:07<51:42, 13.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2273/2500 [6:17:21<52:15, 13.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2274/2500 [6:17:34<51:11, 13.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2275/2500 [6:17:47<50:38, 13.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2276/2500 [6:17:59<48:54, 13.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2277/2500 [6:18:12<47:58, 12.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2278/2500 [6:18:24<47:20, 12.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2279/2500 [6:18:37<47:23, 12.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2280/2500 [6:18:50<47:17, 12.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2281/2500 [6:19:05<49:13, 13.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 2282/2500 [6:19:18<48:41, 13.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 2283/2500 [6:19:32<48:20, 13.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 2284/2500 [6:19:44<47:19, 13.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 2285/2500 [6:19:57<46:54, 13.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 2286/2500 [6:20:11<47:34, 13.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 2287/2500 [6:20:25<47:52, 13.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2288/2500 [6:20:38<47:30, 13.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2289/2500 [6:20:52<47:48, 13.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2290/2500 [6:21:06<47:26, 13.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2291/2500 [6:21:19<46:35, 13.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2292/2500 [6:21:31<45:10, 13.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2293/2500 [6:21:44<45:14, 13.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2294/2500 [6:21:57<44:22, 12.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2295/2500 [6:22:10<44:21, 12.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2296/2500 [6:22:23<44:00, 12.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2297/2500 [6:22:37<44:59, 13.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2298/2500 [6:22:50<44:48, 13.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2299/2500 [6:23:04<44:42, 13.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2300/2500 [6:23:16<43:54, 13.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2301/2500 [6:23:30<43:41, 13.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2302/2500 [6:23:44<44:52, 13.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2303/2500 [6:23:57<44:16, 13.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2304/2500 [6:24:11<43:58, 13.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2305/2500 [6:24:24<43:36, 13.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2306/2500 [6:24:38<44:18, 13.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2307/2500 [6:24:52<44:00, 13.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2308/2500 [6:25:07<44:42, 13.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2309/2500 [6:25:20<43:54, 13.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2310/2500 [6:25:33<42:47, 13.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2311/2500 [6:25:47<42:48, 13.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2312/2500 [6:26:01<43:17, 13.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2313/2500 [6:26:16<43:50, 14.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2314/2500 [6:26:29<42:31, 13.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2315/2500 [6:26:41<41:10, 13.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2316/2500 [6:26:55<41:45, 13.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2317/2500 [6:27:07<39:25, 12.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2318/2500 [6:27:18<37:59, 12.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2319/2500 [6:27:30<37:02, 12.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2320/2500 [6:27:42<37:02, 12.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2321/2500 [6:27:55<36:36, 12.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2322/2500 [6:28:07<36:19, 12.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2323/2500 [6:28:18<35:36, 12.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2324/2500 [6:28:31<35:41, 12.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2325/2500 [6:28:42<34:57, 11.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2326/2500 [6:28:54<34:37, 11.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2327/2500 [6:29:08<35:47, 12.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2328/2500 [6:29:21<36:04, 12.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2329/2500 [6:29:32<35:02, 12.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2330/2500 [6:29:43<33:19, 11.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2331/2500 [6:29:54<32:21, 11.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2332/2500 [6:30:05<32:10, 11.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2333/2500 [6:30:17<32:09, 11.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2334/2500 [6:30:30<33:03, 11.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2335/2500 [6:30:41<32:00, 11.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2336/2500 [6:30:52<31:36, 11.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2337/2500 [6:31:03<31:15, 11.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2338/2500 [6:31:15<31:04, 11.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2339/2500 [6:31:29<33:14, 12.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2340/2500 [6:31:42<33:22, 12.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2341/2500 [6:31:57<34:56, 13.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2342/2500 [6:32:07<32:11, 12.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2343/2500 [6:32:20<32:50, 12.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2344/2500 [6:32:33<32:24, 12.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2345/2500 [6:32:44<31:43, 12.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2346/2500 [6:32:56<31:02, 12.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2347/2500 [6:33:09<31:31, 12.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2348/2500 [6:33:20<30:19, 11.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2349/2500 [6:33:33<30:41, 12.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2350/2500 [6:33:44<29:58, 11.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2351/2500 [6:33:59<31:45, 12.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2352/2500 [6:34:11<30:41, 12.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2353/2500 [6:34:24<31:02, 12.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2354/2500 [6:34:37<30:56, 12.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2355/2500 [6:34:48<29:35, 12.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2356/2500 [6:35:01<30:17, 12.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2357/2500 [6:35:13<29:09, 12.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2358/2500 [6:35:24<28:10, 11.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2359/2500 [6:35:36<27:53, 11.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2360/2500 [6:35:47<27:10, 11.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2361/2500 [6:36:00<28:12, 12.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2362/2500 [6:36:13<28:31, 12.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2363/2500 [6:36:26<29:01, 12.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2364/2500 [6:36:39<28:29, 12.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2365/2500 [6:36:50<27:21, 12.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2366/2500 [6:37:02<27:20, 12.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2367/2500 [6:37:14<26:39, 12.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2368/2500 [6:37:28<27:45, 12.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2369/2500 [6:37:40<26:58, 12.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2370/2500 [6:37:51<26:03, 12.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2371/2500 [6:38:01<24:54, 11.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2372/2500 [6:38:13<24:35, 11.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2373/2500 [6:38:25<24:55, 11.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2374/2500 [6:38:37<24:43, 11.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981570.093020122)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981582.139214875)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981605.327578615)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981615.096470968)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981625.774185563)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981648.221628504)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981658.665721932)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981668.556681717)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981679.778411061)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981689.729572184)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981700.569046928)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981710.287735581)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981729.308838175)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981739.176734872)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981749.477090303)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981759.672435344)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981769.7938206)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981780.123851498)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981790.381197494)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981811.046624695)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981821.883165014)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981831.968867333)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981801.425494685)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981842.745343019)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981853.098042144)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981863.178865182)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981873.432306473)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981893.151105894)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981903.184881794)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981883.696149279)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981913.988005867)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981923.178447191)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981933.657322869)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981944.652424418)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981954.86392122)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981985.303673576)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981965.136217733)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981975.607736981)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981996.127285012)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982007.719879243)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982027.481565173)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982017.523357586)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982047.965603386)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982058.257559289)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982068.206443385)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982092.803712531)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982102.626867558)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982113.012625328)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982133.376179112)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982154.322499594)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982123.964777203)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982143.723326073)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982196.913182279)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982164.14526252)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982174.154675839)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982184.720543699)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982227.151145401)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982236.703946798)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982206.466975972)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982216.86762846)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982259.00432302)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982269.415424231)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982247.577467152)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982279.472031183)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982309.189659817)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982319.639792578)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982289.633058146)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982299.910409079)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982362.706057763)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982330.01176377)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982341.608440367)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982352.557059882)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982383.295563553)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982373.298300267)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982393.686175244)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982413.670566502)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982423.951240397)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982433.56214623)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982451.652912988)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982461.950290351)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982470.852813607)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982506.369914316)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982488.032135885)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982497.225781453)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982526.419533679)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982544.189791435)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982517.61936392)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982535.432291932)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982554.129110248)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982562.382355806)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982572.099459232)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982581.147805374)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982599.482599998)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982609.159732824)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982618.290997389)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982590.142706485)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982638.158875301)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982656.499055057)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982627.060202238)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982646.584575451)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982665.48431521)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982675.736575823)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982686.234886243)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982714.183542705)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982723.241569974)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982705.410975571)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982753.211481294)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982743.193833299)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982762.761813447)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982771.657413159)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982790.245298871)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982799.214913649)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982780.518261411)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982808.889129992)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982843.768207108)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982822.210195956)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982832.121812768)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982855.494070042)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982864.883996271)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982874.196616863)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982911.921340499)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982892.630015742)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982902.134214958)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982921.29430002)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982930.279385254)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982941.215506807)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982950.862240159)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982959.524321917)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982970.342716732)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982979.846398113)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982989.934842358)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 982999.494316526)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983010.35706969)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983020.171719457)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983029.540230747)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983038.43989111)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983047.921290627)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983057.622974843)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983076.888272316)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983086.976542011)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 983097.073076849)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983116.173145378)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983125.166251778)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983134.399573613)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983144.155588431)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983152.986902378)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983162.226266407)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983170.891068005)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983180.343242517)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983190.216462646)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983210.50093828)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983200.809335628)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983220.313939708)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983239.37754749)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983248.387462133)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983230.235923041)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983257.230980907)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983276.514188488)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983266.837254181)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 983285.637973761)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983294.625658024)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983304.021574294)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983313.944324601)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983322.987460922)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983332.235396271)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983351.423967268)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983342.180548214)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983360.436932277)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 983370.061436739)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983380.089815193)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983389.099531708)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983399.35227437)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983408.749634672)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 983418.111383614)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983437.255516813)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983465.487159946)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983446.592221191)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 983455.590956813)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983482.935581114)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983503.868692454)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983474.500129405)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 983493.037211428)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983512.931106859)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983520.689097265)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983529.710475092)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983541.850937054)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 981594.988170123)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981637.471148723)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 981719.463061657)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982037.122091574)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982081.398737002)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982403.408649395)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982442.866221647)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982479.272731216)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982695.518926602)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 982733.412454116)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 982883.537960138)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 983067.543970207)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 983106.31414675)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 983428.366858989)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983569.742965252)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983551.58928218)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 983560.021666351)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 983579.82830742)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983589.578859278)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983610.329788686)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983620.08298326)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983600.76698648)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983660.666369845)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 983640.47566501)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983630.809417184)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 983651.045687976)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983683.63677726)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983692.068423748)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983674.329889698)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983701.865115931)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983711.878592421)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983733.012305106)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983722.501468062)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983773.214403365)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983742.644726666)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 983753.699942754)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 983763.046270601)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983782.895937956)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983794.729684081)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983813.077617277)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983803.963148677)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983822.367387772)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983832.158250275)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983852.48227466)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983841.710518613)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983860.909649895)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 983870.936411155)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 983879.663356771)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983889.732693973)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983899.687388583)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983919.02592583)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983910.363979535)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983929.273638596)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983938.685087502)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983948.133207381)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983957.294261351)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983966.783044337)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983975.512824449)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983987.882056441)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 983998.178351757)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984005.877367489)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984015.000200483)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984024.420484103)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984033.780938142)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984043.498688234)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984052.339318805)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984061.133904376)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984070.736512927)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984079.724728194)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984088.734153768)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 984097.115498924)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984105.950651209)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984116.152536091)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984133.879646983)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984125.160776198)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984157.397790355)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984142.370968822)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 984167.650112676)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984189.69525516)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984198.867328901)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984177.395186245)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 984210.230095184)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984219.913990112)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984239.664467925)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984230.078017376)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984248.811797879)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984257.47074632)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 984265.82880637)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 984276.20711949)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984295.784488438)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984304.802289963)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984285.988281807)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 984314.099834887)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984332.62960084)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984340.875233245)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984323.625755523)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 984350.118586725)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984370.937461941)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984381.921556306)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984360.094115918)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984390.42575069)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984400.106337489)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984409.265412472)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984418.427884099)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984427.146854553)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 984436.343128196)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 984446.333583218)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984455.798206531)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984485.999216237)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984465.766445303)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 984474.932363408)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984495.45819849)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984506.690724926)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984517.245854605)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984533.065112522)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984564.359039672)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984543.034523515)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 984554.056073035)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984574.082065694)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984593.993900105)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984583.565184935)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 984604.769797519)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984614.22718518)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984623.79841771)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984633.263694744)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984642.699355027)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984661.650667731)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984679.038252697)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984652.752149204)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 984670.391729482)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984689.300449717)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984698.40232495)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984707.5946906)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984717.143139218)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984727.433502671)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984737.451008795)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984746.429290049)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984754.922104444)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984768.503168095)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984778.919399949)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 984788.589621273)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984807.251991744)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984797.887007891)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 984816.581806171)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984853.442929051)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984825.579477032)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 984834.537354257)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 984843.705258616)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984862.265197805)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984892.539818285)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984871.81202117)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 984882.438056859)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984902.949801012)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984932.579078733)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984912.966059096)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 984922.711103895)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984942.16990007)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984961.161475508)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984970.405007595)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984951.752861694)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984979.683339103)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 984990.550864254)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985000.220422443)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985009.058521985)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985017.982158403)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985026.70609553)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985046.186446096)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985036.309741774)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985057.237916308)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985067.438761868)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985076.606103611)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985086.898510919)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985106.431743691)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985116.914597727)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985096.658654732)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985145.866747481)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985126.38261775)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985135.996665065)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985156.398981507)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 985168.106235759)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985178.897584571)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985189.416254426)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985199.507517987)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985231.531075398)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985210.088723257)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985221.522863872)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985241.966380305)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985251.978296742)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985261.808746863)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985272.35182244)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985282.183867775)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985303.12818081)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985313.664833233)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985292.261151894)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985323.190971628)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985352.056653275)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985332.92525451)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985342.563858667)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985362.309990739)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985391.629102491)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985371.338150859)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985381.497437148)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985401.710360538)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985413.482899879)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985423.20871016)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985433.46061082)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985454.247728275)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 985475.710818163)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985443.828651708)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985463.98934926)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985486.398009073)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985495.674089582)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985506.068388516)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985515.202835007)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985524.696846543)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985534.243763392)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985545.311806957)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985555.968272326)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985587.290270361)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985566.089838508)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985577.037927832)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985629.767599213)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985610.453399936)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985600.188591282)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985620.42969482)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985659.011614097)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985639.810197987)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985648.999252847)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985669.272612027)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985688.88063966)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985698.97897784)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985708.70414048)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985679.082516577)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985738.749584745)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985748.160987315)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985718.597898232)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985729.063156066)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985761.501330831)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985781.314742481)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985771.065366776)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985792.437911848)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985812.55745833)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985822.015506108)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985802.447945209)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985844.754863509)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985832.989141093)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985853.988507278)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985864.639203934)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985883.920318632)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985874.203579803)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985893.43441623)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985902.512445062)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985913.022098532)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985923.586826969)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985933.709549205)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985944.126010715)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985956.461388358)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985966.838446188)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985976.437464412)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986005.544974973)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986015.102741925)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 985985.3275298)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 985995.011448676)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 986024.665970016)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986036.27997989)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986045.91196039)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 986055.973448395)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986066.139871475)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986085.320698167)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986096.90218293)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986106.810971947)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986116.714206026)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986126.032186413)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986135.78726937)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986145.234890474)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986157.114407226)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986167.246516749)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986177.808344466)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986209.025761905)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986219.06223647)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986187.733099745)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 986197.673639249)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986233.294880415)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986244.315298138)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986254.225669448)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986264.61497285)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986285.35853782)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986275.207467907)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 986294.428732318)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986305.289105188)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986314.621455308)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 986323.951669488)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986333.314754587)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986343.860564572)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986353.057122018)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986362.554731007)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986372.854212073)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986382.507321088)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986392.522303775)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986402.383706911)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 986411.652671749)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986443.229171407)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986453.534332425)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986421.257254547)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 986433.18855793)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986463.549028795)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986484.196050013)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986493.549461032)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986474.12769746)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986514.09323009)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986524.302885663)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986503.761564975)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 986533.919101628)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986551.794013191)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986543.223558164)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 986561.440089133)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 986576.029294897)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986586.125784191)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986596.252070823)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 986605.96958241)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986615.322632312)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986626.768328903)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986637.180526618)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986647.193088272)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986657.020154564)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986685.751217809)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986666.575423841)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 986676.146061495)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986696.016431557)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986706.611890036)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986716.401412431)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 986726.738214272)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986738.249341432)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986748.171316709)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986757.524959692)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 986766.778680523)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986789.686919481)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986800.55709239)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986779.70331744)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 986809.964074683)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986819.733016237)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986849.275812573)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986829.59128866)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 986838.515771666)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986868.39879199)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986859.139427013)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 986878.506226612)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 986888.347955065)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986906.247767287)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986915.241432681)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986897.35385042)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986924.937126763)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986934.510786987)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986955.363885155)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986944.981228524)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986968.87096981)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986980.361721139)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 986990.705192262)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 986999.293145924)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987010.116575333)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987020.097748661)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987030.103129468)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 987041.410257044)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987051.620913595)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987085.527488844)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987062.006943758)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 987073.602295728)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987108.112643219)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987129.568418011)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987097.878693973)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 987119.020455589)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987140.917681846)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987150.623016096)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987162.469048126)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987174.07656957)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 987183.169008681)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987195.299807304)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987206.070415124)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987218.160698651)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987231.974454534)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987242.240973812)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987254.268930274)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987286.722110219)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987299.193091173)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987265.780021342)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 987276.760323656)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987310.454562577)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987320.844496308)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987330.983595782)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987340.830985772)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987351.166859808)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987363.450358327)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 987373.349008389)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987394.520110674)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987405.935084269)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987416.486180911)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987383.992230811)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987429.110768279)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987440.205965738)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987451.114009797)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987460.975259343)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987501.626100372)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987472.139846017)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 987481.13522227)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 987491.064415482)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987522.880268266)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987536.957169693)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987513.264374759)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987550.17672635)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987576.510488997)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987563.202322854)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 987588.396630339)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987600.395348733)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987612.870099109)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987626.42043666)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987639.197908079)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987651.719691313)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987674.421768461)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987687.532491208)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987663.719104512)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987700.602965051)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987712.519960164)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 987723.049287431)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 987733.657702196)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987745.730027293)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987757.044273808)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987769.787185228)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987780.48937684)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987792.583429395)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987804.812293678)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987817.290315333)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987829.104837401)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987841.485401388)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987866.354106677)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987854.036381566)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987878.803342843)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987905.886369291)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987919.180748316)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987891.56255321)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987931.180041778)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987942.972036434)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987955.301829162)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987966.82677702)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987989.793667282)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 988002.599284642)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 988014.562327073)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 987979.089202221)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 988028.173643586)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 988052.476194042)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 988064.617125605)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 988040.082712614)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 988102.257750401)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 988114.930155701)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 988077.135239193)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 988089.544509743)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 988126.83135025)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 988138.915194553)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 988151.59419097)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 988163.63145651)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 988187.999638845)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 988200.061411792)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 988175.856896783)])']\n", "connector: \n", "Evaluating workflow: 95%|█████████▌| 2375/2500 [6:38:52<26:41, 12.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2376/2500 [6:39:05<26:16, 12.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2377/2500 [6:39:17<25:50, 12.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2378/2500 [6:39:29<25:19, 12.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2379/2500 [6:39:40<24:05, 11.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2380/2500 [6:39:53<24:29, 12.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2381/2500 [6:40:03<23:20, 11.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2382/2500 [6:40:16<23:38, 12.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2383/2500 [6:40:32<25:58, 13.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2384/2500 [6:40:47<26:44, 13.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2385/2500 [6:41:00<26:03, 13.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2386/2500 [6:41:13<25:12, 13.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2387/2500 [6:41:26<24:47, 13.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2388/2500 [6:41:37<23:42, 12.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2389/2500 [6:41:49<23:04, 12.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2390/2500 [6:42:01<22:35, 12.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2391/2500 [6:42:19<25:01, 13.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2392/2500 [6:42:31<23:58, 13.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2393/2500 [6:42:44<23:32, 13.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2394/2500 [6:42:56<22:47, 12.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2395/2500 [6:43:08<22:02, 12.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2396/2500 [6:43:21<22:11, 12.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2397/2500 [6:43:34<21:46, 12.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2398/2500 [6:43:45<21:06, 12.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2399/2500 [6:43:57<20:38, 12.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2400/2500 [6:44:11<21:04, 12.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2401/2500 [6:44:22<20:13, 12.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2402/2500 [6:44:33<19:30, 11.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2403/2500 [6:44:46<19:52, 12.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2404/2500 [6:45:00<20:21, 12.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2405/2500 [6:45:13<20:03, 12.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2406/2500 [6:45:26<20:19, 12.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2407/2500 [6:45:38<19:40, 12.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2408/2500 [6:45:52<19:54, 12.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2409/2500 [6:46:04<19:08, 12.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2410/2500 [6:46:16<18:35, 12.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2411/2500 [6:46:28<18:24, 12.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2412/2500 [6:46:48<21:27, 14.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2413/2500 [6:47:01<20:36, 14.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2414/2500 [6:47:15<20:17, 14.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2415/2500 [6:47:28<19:37, 13.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2416/2500 [6:47:41<18:58, 13.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2417/2500 [6:47:54<18:13, 13.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2418/2500 [6:48:07<18:02, 13.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2419/2500 [6:48:20<17:56, 13.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2420/2500 [6:48:33<17:30, 13.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2421/2500 [6:48:46<17:17, 13.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2422/2500 [6:49:00<17:17, 13.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2423/2500 [6:49:12<16:42, 13.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2424/2500 [6:49:26<16:38, 13.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2425/2500 [6:49:38<16:15, 13.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2426/2500 [6:49:51<15:52, 12.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2427/2500 [6:50:04<15:42, 12.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2428/2500 [6:50:17<15:32, 12.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2429/2500 [6:50:31<15:43, 13.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2430/2500 [6:50:44<15:24, 13.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2431/2500 [6:50:57<15:08, 13.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2432/2500 [6:51:09<14:35, 12.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2433/2500 [6:51:24<14:49, 13.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2434/2500 [6:51:36<14:16, 12.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2435/2500 [6:51:49<13:58, 12.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2436/2500 [6:52:01<13:43, 12.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2437/2500 [6:52:13<13:07, 12.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2438/2500 [6:52:26<12:59, 12.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2439/2500 [6:52:40<13:12, 12.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2440/2500 [6:52:53<13:11, 13.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2441/2500 [6:53:06<12:52, 13.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2442/2500 [6:53:20<12:43, 13.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2443/2500 [6:53:40<14:40, 15.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2444/2500 [6:53:53<13:36, 14.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2445/2500 [6:54:07<13:12, 14.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2446/2500 [6:54:21<12:51, 14.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2447/2500 [6:54:32<11:53, 13.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2448/2500 [6:54:45<11:24, 13.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2449/2500 [6:54:58<11:15, 13.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2450/2500 [6:55:12<11:08, 13.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2451/2500 [6:55:25<10:52, 13.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2452/2500 [6:55:39<10:46, 13.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2453/2500 [6:55:52<10:19, 13.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2454/2500 [6:56:05<10:11, 13.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2455/2500 [6:56:19<10:01, 13.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2456/2500 [6:56:32<09:49, 13.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2457/2500 [6:56:46<09:38, 13.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2458/2500 [6:56:58<09:17, 13.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2459/2500 [6:57:11<08:54, 13.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2460/2500 [6:57:24<08:46, 13.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2461/2500 [6:57:38<08:37, 13.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2462/2500 [6:57:51<08:20, 13.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▊| 2463/2500 [6:58:04<08:01, 13.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▊| 2464/2500 [6:58:15<07:32, 12.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▊| 2465/2500 [6:58:27<07:12, 12.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▊| 2466/2500 [6:58:40<07:09, 12.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▊| 2467/2500 [6:58:53<06:55, 12.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▊| 2468/2500 [6:59:06<06:47, 12.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2469/2500 [6:59:20<06:44, 13.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2470/2500 [6:59:33<06:32, 13.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2471/2500 [6:59:46<06:21, 13.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2472/2500 [7:00:01<06:25, 13.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2473/2500 [7:00:15<06:08, 13.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2474/2500 [7:00:28<05:50, 13.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2475/2500 [7:00:40<05:25, 13.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2476/2500 [7:00:53<05:11, 12.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2477/2500 [7:01:06<05:01, 13.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2478/2500 [7:01:20<04:52, 13.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2479/2500 [7:01:32<04:34, 13.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2480/2500 [7:01:45<04:17, 12.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2481/2500 [7:01:57<04:03, 12.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2482/2500 [7:02:10<03:49, 12.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2483/2500 [7:02:24<03:41, 13.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2484/2500 [7:02:36<03:25, 12.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2485/2500 [7:02:49<03:11, 12.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2486/2500 [7:03:02<03:01, 12.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2487/2500 [7:03:14<02:45, 12.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2488/2500 [7:03:27<02:34, 12.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2489/2500 [7:03:40<02:21, 12.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2490/2500 [7:03:53<02:08, 12.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2491/2500 [7:04:06<01:56, 12.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2492/2500 [7:04:21<01:47, 13.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2493/2500 [7:04:34<01:33, 13.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2494/2500 [7:04:49<01:22, 13.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2495/2500 [7:05:02<01:08, 13.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2496/2500 [7:05:16<00:54, 13.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2497/2500 [7:05:28<00:39, 13.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2498/2500 [7:05:43<00:27, 13.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2499/2500 [7:05:57<00:13, 13.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 2500/2500 [7:06:12<00:00, 10.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "Evaluation metrics: {'f1': 0.174, 'em': 0.174, 'acc': 0.1748}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "\n", "optimizer.calltime = 1\n", "optimizer.collate_func = collate_func\n", "\n", "benchmark.error_list = {}\n", "benchmark.timeout = 900\n", "benchmark.dataname = 'pubmedxqa'\n", "optimizer.evaluator.dataname = 'hotpotqa'\n", "with suppress_logger_info():\n", " metrics = optimizer.evaluate(dataset=benchmark, eval_mode=\"test\")\n", "print(\"Evaluation metrics: \", metrics)" ] }, { "cell_type": "code", "execution_count": 11, "id": "56c1de4a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'f1': 0.1388, 'em': 0.1388, 'acc': 0.168}" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metrics" ] }, { "cell_type": "code", "execution_count": 1, "id": "2d323337", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/PyPDF2/__init__.py:21: DeprecationWarning: PyPDF2 is deprecated. Please move to the pypdf library instead.\n", " warnings.warn(\n" ] } ], "source": [ "import os\n", "\n", "from dotenv import load_dotenv\n", "\n", "from evoagentx.agents.agent_manager import AgentManager\n", "from evoagentx.benchmark import HotPotQA\n", "from evoagentx.core.callbacks import suppress_logger_info\n", "from evoagentx.core.logging import logger\n", "from evoagentx.evaluators import Evaluator\n", "from evoagentx.models import OpenAILLM, OpenAILLMConfig\n", "from evoagentx.optimizers import TextGradOptimizer\n", "from evoagentx.prompts import StringTemplate\n", "from evoagentx.workflow import SequentialWorkFlowGraph\n", "from dotenv import load_dotenv\n", "\n", "from evoagentx.agents.agent_manager import AgentManager\n", "from evoagentx.benchmark import MBPP\n", "from evoagentx.core.callbacks import suppress_logger_info\n", "from evoagentx.core.logging import logger\n", "from evoagentx.evaluators import Evaluator\n", "from evoagentx.models import OpenAILLM, OpenAILLMConfig\n", "from evoagentx.optimizers import TextGradOptimizer\n", "from evoagentx.prompts import StringTemplate\n", "from evoagentx.workflow import SequentialWorkFlowGraph\n", "\n", "from evoagentx.models import OpenAILLMConfig, OpenAILLM\n", "from evoagentx.workflow import SEWWorkFlowGraph, STRUCTUREWorkFlowGraph\n", "from evoagentx.agents import AgentManager\n", "from evoagentx.benchmark import HumanEval,AFlowMBPP\n", "from evoagentx.evaluators import Evaluator \n", "from evoagentx.optimizers import SEWOptimizer, STRUCTUREOptimizer\n", "from evoagentx.optimizers.structure_optimizer import STRUCTUREWorkFlowScheme\n", "from evoagentx.core.callbacks import suppress_logger_info\n", "\n", "from evoagentx.models import OpenAILLMConfig, OpenAILLM,AzureOpenAIConfig,LiteLLMConfig,LiteLLM\n", "from evoagentx.workflow import SEWWorkFlowGraph \n", "from evoagentx.agents import AgentManager\n", "from evoagentx.benchmark import MBPPPLUS, AFlowMBPPPLUS\n", "from evoagentx.evaluators import Evaluator \n", "from evoagentx.optimizers import SEWOptimizer \n", "from evoagentx.core.callbacks import suppress_logger_info\n", "from evoagentx.benchmark import HumanEvalPLUS\n", "from evoagentx.benchmark import SciCode\n", "from evoagentx.benchmark import PertQA\n", "from copy import deepcopy\n", "\n", "import nest_asyncio\n", "nest_asyncio.apply()\n", "\n", "class PertQASplits(PertQA):\n", " def _load_data(self):\n", " # load the original test data \n", " super()._load_data(pertdata = 'adamson')\n", " # split the data into train, dev and test\n", " import numpy as np \n", " np.random.seed(42)\n", " permutation = np.random.permutation(len(self._dev_data))\n", " full_test_data = self._dev_data \n", " # randomly select 10 samples for train, 40 for dev, and 100 for test\n", " self._train_data = [full_test_data[idx] for idx in permutation[:50]]\n", " self._dev_data = [full_test_data[idx] for idx in permutation[:50]]\n", " self._fulldata = full_test_data\n", "\n", "\n", "def collate_func(example: dict) -> dict:\n", " problem = \"Question: {}\\n\\nAnswer:\".format(example[\"question_new\"])\n", " return {\"question\": problem}\n", "\n", "\n", "# api_key = \"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\"\n", "# # Function to encode the image\n", "# import os\n", "# os.environ[\"OPENAI_API_KEY\"] = api_key\n", "# OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", "\n", "\n", "# llm_config = OpenAILLMConfig(model=\"gpt-4o-mini-2024-07-18\", openai_key=OPENAI_API_KEY, top_p=0.85, temperature=0.2, frequency_penalty=0.0, presence_penalty=0.0)\n", "# llm = OpenAILLM(config=llm_config)\n", "os.environ[\"AZURE_OPENAI_DEPLOYMENT_NAME\"] = \"gpt-4o-mini\"\n", "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"https://tianyuliu-hua-raredisea-resource.cognitiveservices.azure.com/\"\n", "os.environ[\"AZURE_OPENAI_KEY\"] = \"2pa9h2ZIN1lQepFWwYADlXIKIansa9KPhxMoumeGbRQ08f2uDTXiJQQJ99BKACHYHv6XJ3w3AAAAACOGsQIt\"\n", "os.environ[\"AZURE_OPENAI_API_VERSION\"] = \"2025-01-01-preview\"\n", "llm_config = LiteLLMConfig(model=\"azure/\" + os.getenv(\"AZURE_OPENAI_DEPLOYMENT_NAME\"), # Azure model format\n", " azure_endpoint=os.getenv(\"AZURE_OPENAI_ENDPOINT\"),\n", " azure_key=os.getenv(\"AZURE_OPENAI_KEY\"),\n", " api_version=os.getenv(\"AZURE_OPENAI_API_VERSION\", \"2024-12-01-preview\"), top_p=0.85, temperature=0.2, frequency_penalty=0.0, presence_penalty=0.0)\n", "\n", "executor_llm = LiteLLM(config=llm_config)\n", "optimizer_llm = LiteLLM(config=llm_config)\n", "llm = executor_llm" ] }, { "cell_type": "code", "execution_count": 2, "id": "173071a4", "metadata": {}, "outputs": [], "source": [ "# hotpotqa_graph_data = {\n", "# \"goal\": \"Provide a direct answer to the question based on the context, without including explanations or reasoning.\",\n", "# \"tasks\": [\n", "# {\n", "# \"name\": \"answer_generate\",\n", "# \"description\": \"Generate a direct answer to the question based on the context.\",\n", "# \"inputs\": [\n", "# {\"name\": \"question\", \"type\": \"str\", \"required\": True, \"description\": \"The question to answer directly.\"}\n", "# ],\n", "# \"outputs\": [\n", "# {\"name\": \"answer\", \"type\": \"str\", \"required\": True, \"description\": \"The direct answer to the question.\"}\n", "# ],\n", "# \"prompt_template\": StringTemplate(instruction=\"Think step by step to answer the question. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field. You answer could be only Yes or NO.\\nFormat your output in xml format, such as xxx and xxx.\"),\n", "# \"parse_mode\": \"xml\"\n", "# }\n", "# ] \n", "# }\n", "\n", "#generated_workflow\n", "hotpotqa_graph_data = {\n", " \"goal\": \"Provide a concise answer to the question using relevant context. The answer must be straightforward and avoid unnecessary explanations.\",\n", " \"tasks\": [\n", " {\n", " \"name\": \"generate_answer\",\n", " \"description\": \"Extract and formulate an answer from the given context.\",\n", " \"inputs\": [\n", " {\"name\": \"question\", \"type\": \"str\", \"required\": True, \"description\": \"The question that needs to be answered.\"},\n", " ],\n", " \"outputs\": [\n", " {\"name\": \"answer\", \"type\": \"str\", \"required\": True, \"description\": \"The direct answer to the question.\"}\n", " ],\n", " \"prompt_template\": StringTemplate(instruction=\"Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.\"),\n", " \"parse_mode\": \"xml\"\n", " }\n", " ]\n", "}" ] }, { "cell_type": "code", "execution_count": 3, "id": "83d3f964", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:39:32.816\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.tools.storage_handler\u001b[0m:\u001b[36m_initialize_storage\u001b[0m:\u001b[36m133\u001b[0m - \u001b[1mLocal storage initialized with base path: .\u001b[0m\n" ] } ], "source": [ "from evoagentx.benchmark import HotPotQA\n", "from evoagentx.tools import ArxivToolkit\n", "import evoagentx.tools\n", "wiki_toolkit = evoagentx.tools.WikipediaSearchToolkit(max_summary_sentences=5)\n", "arxiv_toolkit = evoagentx.tools.ArxivToolkit()\n", "search_toolkit = evoagentx.tools.DDGSSearchToolkit( num_search_pages=5,\n", " max_content_words=300,\n", " backend=\"auto\", # Options: \"auto\", \"duckduckgo\", \"google\", \"bing\", \"brave\", \"yahoo\"\n", " region=\"us-en\" # Language and region settings\n", " )" ] }, { "cell_type": "code", "execution_count": 4, "id": "e92f39be", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:39:32.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.pertqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m52\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/reploge_train.json ...\u001b[0m\n", "\u001b[32m2026-01-05 09:39:32.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.pertqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m52\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/reploge_train.json ...\u001b[0m\n", "\u001b[32m2026-01-05 09:39:32.964\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.pertqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m52\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/reploge_test.json ...\u001b[0m\n" ] } ], "source": [ "# llm_config = OpenAILLMConfig(model=\"gpt-4.1-mini-2025-04-14\", openai_key=OPENAI_API_KEY, top_p=0.85, temperature=0.2, frequency_penalty=0.0, presence_penalty=0.0)\n", "# llm = OpenAILLM(config=llm_config)\n", "\n", "# obtain SEW workflow \n", "# sew_graph = SEWWorkFlowGraph.from_dict(hotpotqa_graph_data)\n", "# agent_manager = AgentManager()\n", "# agent_manager.add_agents_from_workflow(sew_graph, executor_llm.config)\n", "# obtain SEW workflow \n", "# sew_graph = QASTRUCTUREWorkFlowGraph.from_dict(hotpotqa_graph_data)\n", "# benchmark = PertQA(pertdata='norman')\n", "benchmark = PertQA(pertdata='reploge')\n", "sew_graph = SequentialWorkFlowGraph.from_dict(hotpotqa_graph_data)\n", "agent_manager = AgentManager(tools=[search_toolkit,wiki_toolkit,arxiv_toolkit])\n", "agent_manager.add_agents_from_workflow(sew_graph, llm_config=llm_config)\n", "evaluator = Evaluator(llm=llm, agent_manager=agent_manager, collate_func=collate_func, num_workers=20, verbose=True)" ] }, { "cell_type": "code", "execution_count": 5, "id": "45761220", "metadata": {}, "outputs": [], "source": [ "from evoagentx.optimizers import QASTRUCTUREOptimizer, TextGradOptimizer" ] }, { "cell_type": "code", "execution_count": 6, "id": "9d046318", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# graph = QASTRUCTUREOptimizer.load_module(\"./debug/save_10_noreason.json\")\n", "# SequentialWorkFlowGraph.from_dict(graph['graph'])" ] }, { "cell_type": "code", "execution_count": 7, "id": "03323462", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "59070" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(benchmark._dev_data)" ] }, { "cell_type": "code", "execution_count": 8, "id": "169ad4b0", "metadata": {}, "outputs": [], "source": [ "\n", "benchmark._fulldata = deepcopy(benchmark._train_data)\n", "benchmark._train_data = benchmark._train_data[0:50]\n", "benchmark._dev_data = benchmark._dev_data[0:50]" ] }, { "cell_type": "code", "execution_count": 9, "id": "324c87e5", "metadata": { "scrolled": true }, "outputs": [], "source": [ "evaluator = Evaluator(llm=llm, agent_manager=agent_manager, collate_func=collate_func, num_workers=20, verbose=True)\n", "# obtain SEWOptimizer after having more roles\n", "optimizer = QASTRUCTUREOptimizer(\n", " graph=sew_graph, \n", " evaluator=evaluator, \n", " llm=llm, \n", " max_steps=10,\n", " eval_rounds=1, \n", " repr_scheme=\"python\", \n", " optimize_mode=\"all\", \n", " order=\"zero-order\",\n", " max_rounds=1\n", ")\n", "optimizer.calltime = 1\n", "optimizer.collate_func = collate_func\n", "\n", "benchmark.error_list = {}\n", "benchmark.timeout = 900\n", "benchmark.dataname = 'pubmedxqa'" ] }, { "cell_type": "code", "execution_count": 10, "id": "23d823c3", "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:39:33.217\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1016\u001b[0m - \u001b[1mOptimizing the SequentialWorkFlowGraph workflow with python representation.\u001b[0m\n", "\u001b[32m2026-01-05 09:39:33.217\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1020\u001b[0m - \u001b[1mRun initial evaluation on the original workflow ...\u001b[0m\n", "Evaluating workflow: 2%|▏ | 1/50 [00:01<00:59, 1.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Task exception was never retrieved\n", "future: exception=RuntimeError('Event loop is closed')>\n", "Traceback (most recent call last):\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/tasks.py\", line 277, in __step\n", " result = coro.send(None)\n", " ^^^^^^^^^^^^^^^\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/litellm/utils.py\", line 873, in _client_async_logging_helper\n", " GLOBAL_LOGGING_WORKER.ensure_initialized_and_enqueue(\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/litellm/litellm_core_utils/logging_worker.py\", line 322, in ensure_initialized_and_enqueue\n", " self.enqueue(async_coroutine)\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/litellm/litellm_core_utils/logging_worker.py\", line 131, in enqueue\n", " self._queue.put_nowait(task)\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/queues.py\", line 147, in put_nowait\n", " self._wakeup_next(self._getters)\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/queues.py\", line 63, in _wakeup_next\n", " waiter.set_result(None)\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/futures.py\", line 263, in set_result\n", " self.__schedule_callbacks()\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/futures.py\", line 173, in __schedule_callbacks\n", " self._loop.call_soon(callback, self, context=ctx)\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/base_events.py\", line 762, in call_soon\n", " self._check_closed()\n", " File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/base_events.py\", line 520, in _check_closed\n", " raise RuntimeError('Event loop is closed')\n", "RuntimeError: Event loop is closed\n", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:45, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:41, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:40, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:05<00:53, 1.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:06<00:56, 1.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:07<00:48, 1.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:08<00:42, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:09<00:36, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:09<00:34, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:10<00:32, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:11<00:30, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:12<00:33, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:13<00:31, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:14<00:31, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:14<00:28, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:15<00:26, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:16<00:26, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:17<00:25, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:18<00:26, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:19<00:24, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:19<00:22, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:20<00:21, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:21<00:20, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:22<00:19, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:23<00:21, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:24<00:21, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:25<00:20, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:26<00:19, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:26<00:16, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:27<00:15, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:28<00:13, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:29<00:14, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:30<00:13, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:30<00:12, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:31<00:11, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:32<00:10, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:33<00:09, 1.31it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:33<00:08, 1.34it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:34<00:07, 1.37it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:35<00:06, 1.36it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:36<00:06, 1.33it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:37<00:05, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:37<00:05, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:38<00:04, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:39<00:03, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:40<00:02, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:40<00:01, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:41<00:00, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:42<00:00, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:40:15.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1024\u001b[0m - \u001b[1mInitial metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.78}\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:40:17.736\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.006 | Total tokens: 35762 | Current cost: $0.001 | Current tokens: 5125\u001b[0m\n", "- The workflow lacks validation steps to ensure the accuracy of predictions before finalizing answers, leading to multiple instances of incorrect solutions.\n", "- There is no mechanism to handle cases where the perturbation involves measuring the expression of the same gene that is being perturbed, which can lead to ambiguous interpretations.\n", "- The workflow does not account for potential contradictions in predictions and solutions, as seen in cases where the predicted answer was 'Yes' but the solution was 'No'.\n", "- The control flow is flawed as it does not differentiate between different types of perturbations or the context of gene expression changes, leading to oversimplified responses.\n", "- The prompt content is underspecified regarding the criteria for determining \"significant change,\" which may lead to inconsistent interpretations across different questions.\n", "\u001b[32m2026-01-05 09:40:19.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.007 | Total tokens: 36475 | Current cost: $0.000 | Current tokens: 713\u001b[0m\n", "```python\n", "steps = [\n", " {'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", " {'name': 'validate_answer', 'args': ['answer'], 'outputs': ['validated_answer']},\n", " {'name': 'check_contradictions', 'args': ['validated_answer'], 'outputs': ['final_answer']},\n", " {'name': 'measure_expression', 'args': ['question'], 'outputs': ['expression_data']},\n", " {'name': 'determine_significant_change', 'args': ['expression_data'], 'outputs': ['significant_change']},\n", " {'name': 'finalize_response', 'args': ['final_answer', 'significant_change'], 'outputs': ['final_response']}\n", "]\n", "```\n", "Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of POLE, does the expression profile of HNRNPD indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, KARS is perturbed and MASTL expression is quantified. Does this perturbation result in a significant change in MASTL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ANKRD17 is perturbed and LRRC23 expression is measured. Determine whether LRRC23 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of TAF13 is associated with a significant change in SNHG3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ORC1 is perturbed and the expression of TLCD3A is measured. Does this perturbation cause a significant change in TLCD3A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PSMA6 is perturbed and PLK1 expression is observed. Does this perturbation lead to a significant difference in PLK1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ZMAT5 is perturbed and the expression of UQCR10 is measured. Does this perturbation cause a significant change in UQCR10 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of CSTF1, does the expression profile of PCLAF indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb GPN1 and examine the expression of BCS1L. Does perturbing GPN1 lead to a significant change in BCS1L expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GET1, does the expression profile of GFPT1 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SETD1A is perturbed and the expression of PSAT1 is measured. Determine whether PSAT1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PELP1 is perturbed and CDKN1A expression is measured. Determine whether CDKN1A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb CNN2 and monitor CNN2 expression. Decide whether this perturbation leads to a significant alteration in CNN2 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of CLOCK, does the expression profile of SNHG12 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GTF3C3, does the expression profile of RPL22L1 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, U2AF2 is perturbed and KIAA1841 expression is measured. Determine whether KIAA1841 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to USP14 and then measure expression of USP14. Does this perturbation cause a significant change in USP14 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TTC4 and then measure expression of SELENOF. Does this perturbation cause a significant change in SELENOF expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DENR and examine the expression of RIF1. Does perturbing DENR lead to a significant change in RIF1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, POGLUT3 is perturbed and SFRP1 expression is measured. Determine whether SFRP1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PSMD2 is associated with a significant change in CENPW expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ZNRD1 is perturbed and the expression of ESCO2 is measured. Does this perturbation cause a significant change in ESCO2 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MAD2L2 and examine the expression of MAD2L2. Does perturbing MAD2L2 lead to a significant change in MAD2L2 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TIMM10 is perturbed and the expression of CDKN1A is measured. Does this perturbation cause a significant change in CDKN1A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DYNLL1 and then measure expression of SUGCT. Does this perturbation cause a significant change in SUGCT expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb CCDC130 and examine the expression of CLSPN. Does perturbing CCDC130 lead to a significant change in CLSPN expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of HNRNPM is associated with a significant change in HNRNPM expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb PSTK and examine the expression of GANAB. Does perturbing PSTK lead to a significant change in GANAB expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which LONP1 is perturbed and NUPR1 expression is observed. Does this perturbation lead to a significant difference in NUPR1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which DSTYK is perturbed and EMP3 expression is observed. Does this perturbation lead to a significant difference in EMP3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of WDR77 is associated with a significant change in FLII expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of BCLAF1 is associated with a significant change in EDN1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb HNRNPM and monitor DPH7 expression. Decide whether this perturbation leads to a significant alteration in DPH7 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRPL36 and examine the expression of LAMP1. Does perturbing MRPL36 lead to a significant change in LAMP1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, NDUFB6 is perturbed and the expression of GALK1 is measured. Determine whether GALK1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to WBP11 and then measure expression of SELENOM. Does this perturbation cause a significant change in SELENOM expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of RAC1, does the expression profile of RAC1 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, EGLN2 is perturbed and the expression of LDHA is measured. Determine whether LDHA shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SNRPG is perturbed and the expression of LRRCC1 is measured. Determine whether LRRCC1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ARMC6 is associated with a significant change in FBN2 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TRAPPC8 is perturbed and the expression of AP1M2 is measured. Does this perturbation cause a significant change in AP1M2 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, TAZ is perturbed and XRN1 expression is measured. Determine whether XRN1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSP90B1 is perturbed and HSP90B1 expression is measured. Determine whether HSP90B1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SMARCC1 and then measure expression of MZT2B. Does this perturbation cause a significant change in MZT2B expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DNAJA3 is perturbed and MT-CO3 expression is quantified. Does this perturbation result in a significant change in MT-CO3 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb CLNS1A and examine the expression of CLNS1A. Does perturbing CLNS1A lead to a significant change in CLNS1A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SRP9 and monitor GAS5 expression. Decide whether this perturbation leads to a significant alteration in GAS5 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PTK2 is perturbed and BRCA2 expression is observed. Does this perturbation lead to a significant difference in BRCA2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of DDX19B, does the expression profile of ASPH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to RPLP1 and then measure expression of FANCG. Does this perturbation cause a significant change in FANCG expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': None, 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:40:20.848\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.008 | Total tokens: 41710 | Current cost: $0.001 | Current tokens: 5235\u001b[0m\n", "\u001b[32m2026-01-05 09:40:21.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.008 | Total tokens: 41816 | Current cost: $0.000 | Current tokens: 106\u001b[0m\n", "\u001b[32m2026-01-05 09:40:22.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.008 | Total tokens: 42381 | Current cost: $0.000 | Current tokens: 565\u001b[0m\n", "{'name': 'validate_answer5341', 'description': 'Task to validate_answer5341. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer5341', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer5341', 'required': True}], 'prompt': 'Your are a task solver.', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n", "\u001b[32m2026-01-05 09:40:24.032\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.009 | Total tokens: 47665 | Current cost: $0.001 | Current tokens: 5284\u001b[0m\n", "\u001b[32m2026-01-05 09:40:24.591\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.009 | Total tokens: 47762 | Current cost: $0.000 | Current tokens: 97\u001b[0m\n", "\u001b[32m2026-01-05 09:40:25.745\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.009 | Total tokens: 48428 | Current cost: $0.000 | Current tokens: 666\u001b[0m\n", "{'name': 'check_contradictions3523', 'description': 'Task to check_contradictions3523. Takes validated_answer as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for check_contradictions3523', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from check_contradictions3523', 'required': True}], 'prompt': 'Your are a task solver.', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n", "\u001b[32m2026-01-05 09:40:27.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.010 | Total tokens: 53717 | Current cost: $0.001 | Current tokens: 5289\u001b[0m\n", "\u001b[32m2026-01-05 09:40:27.778\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.010 | Total tokens: 53814 | Current cost: $0.000 | Current tokens: 97\u001b[0m\n", "\u001b[32m2026-01-05 09:40:29.839\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.010 | Total tokens: 54511 | Current cost: $0.000 | Current tokens: 697\u001b[0m\n", "{'name': 'measure_expression3983', 'description': 'Task to measure_expression3983. Takes question as input. Produces expression_data as output.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'Input parameter question for measure_expression3983', 'required': False}], 'outputs': [{'name': 'expression_data', 'type': 'str', 'description': 'Output parameter expression_data from measure_expression3983', 'required': True}], 'prompt': 'Your are a task solver.', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n", "\u001b[32m2026-01-05 09:40:31.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.011 | Total tokens: 59804 | Current cost: $0.001 | Current tokens: 5293\u001b[0m\n", "\u001b[32m2026-01-05 09:40:32.345\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.011 | Total tokens: 59899 | Current cost: $0.000 | Current tokens: 95\u001b[0m\n", "\u001b[32m2026-01-05 09:40:33.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.011 | Total tokens: 60555 | Current cost: $0.000 | Current tokens: 656\u001b[0m\n", "{'name': 'determine_significant_change2829', 'description': 'Task to determine_significant_change2829. Takes expression_data as input. Produces significant_change as output.', 'inputs': [{'name': 'expression_data', 'type': 'str', 'description': 'Input parameter expression_data for determine_significant_change2829', 'required': False}], 'outputs': [{'name': 'significant_change', 'type': 'str', 'description': 'Output parameter significant_change from determine_significant_change2829', 'required': True}], 'prompt': 'Your are a task solver.', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n", "\u001b[32m2026-01-05 09:40:35.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.012 | Total tokens: 65848 | Current cost: $0.001 | Current tokens: 5293\u001b[0m\n", "\u001b[32m2026-01-05 09:40:35.863\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.012 | Total tokens: 65951 | Current cost: $0.000 | Current tokens: 103\u001b[0m\n", "\u001b[32m2026-01-05 09:40:37.362\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.012 | Total tokens: 66611 | Current cost: $0.000 | Current tokens: 660\u001b[0m\n", "{'name': 'finalize_response1607', 'description': 'Task to finalize_response1607. Takes final_answer, significant_change as input. Produces final_response as output.', 'inputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Input parameter final_answer for finalize_response1607', 'required': False}, {'name': 'significant_change', 'type': 'str', 'description': 'Input parameter significant_change for finalize_response1607', 'required': False}], 'outputs': [{'name': 'final_response', 'type': 'str', 'description': 'Output parameter final_response from finalize_response1607', 'required': True}], 'prompt': 'Your are a task solver.', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:40:38.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.013 | Total tokens: 71914 | Current cost: $0.001 | Current tokens: 5303\u001b[0m\n", "\u001b[32m2026-01-05 09:40:39.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.013 | Total tokens: 72010 | Current cost: $0.000 | Current tokens: 96\u001b[0m\n", "\u001b[32m2026-01-05 09:40:40.831\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.013 | Total tokens: 72701 | Current cost: $0.000 | Current tokens: 691\u001b[0m\n", "\u001b[32m2026-01-05 09:40:40.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1054\u001b[0m - \u001b[1mEvaluate the workflow at step 1 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:01<00:54, 1.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:42, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:38, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:38, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:37, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:04<00:32, 1.34it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:05<00:31, 1.35it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:06<00:31, 1.35it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:07<00:30, 1.33it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:08<00:32, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:08<00:31, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:09<00:30, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:10<00:27, 1.33it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:10<00:26, 1.37it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:11<00:25, 1.35it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:12<00:25, 1.33it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:13<00:27, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:14<00:25, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:14<00:23, 1.34it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:15<00:22, 1.33it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:16<00:23, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:17<00:22, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:18<00:24, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:19<00:23, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:20<00:20, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:20<00:18, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:21<00:18, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:22<00:18, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:23<00:17, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:24<00:17, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:25<00:16, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:25<00:14, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:26<00:14, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:27<00:14, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:28<00:13, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:29<00:12, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:30<00:11, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:31<00:11, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:32<00:09, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:33<00:09, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:34<00:08, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:34<00:06, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:35<00:05, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:36<00:05, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:37<00:04, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:38<00:03, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:39<00:02, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:39<00:01, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:40<00:00, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:42<00:00, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:41:23.379\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1057\u001b[0m - \u001b[1mStep 1 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.78}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:41:25.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.020 | Total tokens: 108455 | Current cost: $0.001 | Current tokens: 5117\u001b[0m\n", "- The workflow lacks validation steps to ensure the accuracy of predictions before finalizing the answers, leading to multiple instances of incorrect solutions.\n", "- There is an absence of error handling for cases where the predictions and solutions do not align, resulting in misleading outputs.\n", "- The workflow does not account for the potential need for additional context or data analysis steps that may be necessary for accurate interpretation of the questions.\n", "- The structure assumes that all questions can be answered with a simple 'Yes' or 'No' without considering the complexity of gene expression changes, which may not always be straightforward.\n", "- The workflow does not specify how to handle cases where the expression of the perturbed gene is measured, which could lead to ambiguous interpretations.\n", "\u001b[32m2026-01-05 09:41:26.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.020 | Total tokens: 109125 | Current cost: $0.000 | Current tokens: 670\u001b[0m\n", "```python\n", "steps = [\n", " {'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", " {'name': 'validate_answer', 'args': ['answer'], 'outputs': ['validated_answer']},\n", " {'name': 'handle_errors', 'args': ['validated_answer'], 'outputs': ['final_answer']},\n", " {'name': 'context_analysis', 'args': ['question'], 'outputs': ['context_data']},\n", " {'name': 'measure_expression', 'args': ['context_data'], 'outputs': ['expression_data']}\n", "]\n", "```\n", "Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of POLE, does the expression profile of HNRNPD indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, KARS is perturbed and MASTL expression is quantified. Does this perturbation result in a significant change in MASTL expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ANKRD17 is perturbed and LRRC23 expression is measured. Determine whether LRRC23 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of TAF13 is associated with a significant change in SNHG3 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ORC1 is perturbed and the expression of TLCD3A is measured. Does this perturbation cause a significant change in TLCD3A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PSMA6 is perturbed and PLK1 expression is observed. Does this perturbation lead to a significant difference in PLK1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ZMAT5 is perturbed and the expression of UQCR10 is measured. Does this perturbation cause a significant change in UQCR10 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of CSTF1, does the expression profile of PCLAF indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb GPN1 and examine the expression of BCS1L. Does perturbing GPN1 lead to a significant change in BCS1L expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GET1, does the expression profile of GFPT1 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SETD1A is perturbed and the expression of PSAT1 is measured. Determine whether PSAT1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PELP1 is perturbed and CDKN1A expression is measured. Determine whether CDKN1A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb CNN2 and monitor CNN2 expression. Decide whether this perturbation leads to a significant alteration in CNN2 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of CLOCK, does the expression profile of SNHG12 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GTF3C3, does the expression profile of RPL22L1 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, U2AF2 is perturbed and KIAA1841 expression is measured. Determine whether KIAA1841 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to USP14 and then measure expression of USP14. Does this perturbation cause a significant change in USP14 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TTC4 and then measure expression of SELENOF. Does this perturbation cause a significant change in SELENOF expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DENR and examine the expression of RIF1. Does perturbing DENR lead to a significant change in RIF1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, POGLUT3 is perturbed and SFRP1 expression is measured. Determine whether SFRP1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PSMD2 is associated with a significant change in CENPW expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ZNRD1 is perturbed and the expression of ESCO2 is measured. Does this perturbation cause a significant change in ESCO2 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MAD2L2 and examine the expression of MAD2L2. Does perturbing MAD2L2 lead to a significant change in MAD2L2 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TIMM10 is perturbed and the expression of CDKN1A is measured. Does this perturbation cause a significant change in CDKN1A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DYNLL1 and then measure expression of SUGCT. Does this perturbation cause a significant change in SUGCT expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb CCDC130 and examine the expression of CLSPN. Does perturbing CCDC130 lead to a significant change in CLSPN expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of HNRNPM is associated with a significant change in HNRNPM expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb PSTK and examine the expression of GANAB. Does perturbing PSTK lead to a significant change in GANAB expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which LONP1 is perturbed and NUPR1 expression is observed. Does this perturbation lead to a significant difference in NUPR1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which DSTYK is perturbed and EMP3 expression is observed. Does this perturbation lead to a significant difference in EMP3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of WDR77 is associated with a significant change in FLII expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of BCLAF1 is associated with a significant change in EDN1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb HNRNPM and monitor DPH7 expression. Decide whether this perturbation leads to a significant alteration in DPH7 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb MRPL36 and examine the expression of LAMP1. Does perturbing MRPL36 lead to a significant change in LAMP1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, NDUFB6 is perturbed and the expression of GALK1 is measured. Determine whether GALK1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to WBP11 and then measure expression of SELENOM. Does this perturbation cause a significant change in SELENOM expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of RAC1, does the expression profile of RAC1 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, EGLN2 is perturbed and the expression of LDHA is measured. Determine whether LDHA shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SNRPG is perturbed and the expression of LRRCC1 is measured. Determine whether LRRCC1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ARMC6 is associated with a significant change in FBN2 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TRAPPC8 is perturbed and the expression of AP1M2 is measured. Does this perturbation cause a significant change in AP1M2 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, TAZ is perturbed and XRN1 expression is measured. Determine whether XRN1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSP90B1 is perturbed and HSP90B1 expression is measured. Determine whether HSP90B1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SMARCC1 and then measure expression of MZT2B. Does this perturbation cause a significant change in MZT2B expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DNAJA3 is perturbed and MT-CO3 expression is quantified. Does this perturbation result in a significant change in MT-CO3 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb CLNS1A and examine the expression of CLNS1A. Does perturbing CLNS1A lead to a significant change in CLNS1A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SRP9 and monitor GAS5 expression. Decide whether this perturbation leads to a significant alteration in GAS5 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PTK2 is perturbed and BRCA2 expression is observed. Does this perturbation lead to a significant difference in BRCA2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of DDX19B, does the expression profile of ASPH indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to RPLP1 and then measure expression of FANCG. Does this perturbation cause a significant change in FANCG expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': None, 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:41:29.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.021 | Total tokens: 114342 | Current cost: $0.001 | Current tokens: 5217\u001b[0m\n", "\u001b[32m2026-01-05 09:41:30.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.021 | Total tokens: 114439 | Current cost: $0.000 | Current tokens: 97\u001b[0m\n", "\u001b[32m2026-01-05 09:41:31.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.021 | Total tokens: 115039 | Current cost: $0.000 | Current tokens: 600\u001b[0m\n", "{'name': 'validate_answer4001', 'description': 'Task to validate_answer4001. Takes answer as input. Produces validated_answer as output.', 'inputs': [{'name': 'answer', 'type': 'str', 'description': 'Input parameter answer for validate_answer4001', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_answer4001', 'required': True}], 'prompt': 'Your are a task solver.', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n", "\u001b[32m2026-01-05 09:41:32.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.022 | Total tokens: 120275 | Current cost: $0.001 | Current tokens: 5236\u001b[0m\n", "\u001b[32m2026-01-05 09:41:33.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.022 | Total tokens: 120366 | Current cost: $0.000 | Current tokens: 91\u001b[0m\n", "\u001b[32m2026-01-05 09:41:34.341\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.022 | Total tokens: 120973 | Current cost: $0.000 | Current tokens: 607\u001b[0m\n", "{'name': 'handle_errors3745', 'description': 'Task to handle_errors3745. Takes validated_answer as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for handle_errors3745', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from handle_errors3745', 'required': True}], 'prompt': 'Your are a task solver.', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n", "\u001b[32m2026-01-05 09:41:36.022\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.023 | Total tokens: 126232 | Current cost: $0.001 | Current tokens: 5259\u001b[0m\n", "\u001b[32m2026-01-05 09:41:36.846\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.023 | Total tokens: 126342 | Current cost: $0.000 | Current tokens: 110\u001b[0m\n", "\u001b[32m2026-01-05 09:41:38.111\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.023 | Total tokens: 126998 | Current cost: $0.000 | Current tokens: 656\u001b[0m\n", "{'name': 'context_analysis2082', 'description': 'Task to context_analysis2082. Takes question as input. Produces context_data as output.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'Input parameter question for context_analysis2082', 'required': False}], 'outputs': [{'name': 'context_data', 'type': 'str', 'description': 'Output parameter context_data from context_analysis2082', 'required': True}], 'prompt': 'Your are a task solver.', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n", "\u001b[32m2026-01-05 09:41:40.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.024 | Total tokens: 132262 | Current cost: $0.001 | Current tokens: 5264\u001b[0m\n", "\u001b[32m2026-01-05 09:41:41.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.024 | Total tokens: 132380 | Current cost: $0.000 | Current tokens: 118\u001b[0m\n", "\u001b[32m2026-01-05 09:41:42.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.024 | Total tokens: 133046 | Current cost: $0.000 | Current tokens: 666\u001b[0m\n", "{'name': 'measure_expression3685', 'description': 'Task to measure_expression3685. Takes context_data as input. Produces expression_data as output.', 'inputs': [{'name': 'context_data', 'type': 'str', 'description': 'Input parameter context_data for measure_expression3685', 'required': False}], 'outputs': [{'name': 'expression_data', 'type': 'str', 'description': 'Output parameter expression_data from measure_expression3685', 'required': True}], 'prompt': 'Your are a task solver.', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n", "\u001b[32m2026-01-05 09:41:44.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.025 | Total tokens: 138296 | Current cost: $0.001 | Current tokens: 5250\u001b[0m\n", "\u001b[32m2026-01-05 09:41:44.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.025 | Total tokens: 138401 | Current cost: $0.000 | Current tokens: 105\u001b[0m\n", "\u001b[32m2026-01-05 09:41:46.272\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.026 | Total tokens: 139038 | Current cost: $0.000 | Current tokens: 637\u001b[0m\n", "\u001b[32m2026-01-05 09:41:46.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1054\u001b[0m - \u001b[1mEvaluate the workflow at step 2 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:47, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:43, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:40, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:42, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:37, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:43, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:44, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:41, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:08<00:36, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:09<00:33, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:09<00:32, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:10<00:29, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:11<00:28, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:12<00:37, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:13<00:33, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:14<00:30, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:15<00:31, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:16<00:30, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:17<00:27, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:18<00:27, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:19<00:26, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:19<00:24, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:20<00:22, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:21<00:21, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:22<00:19, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:22<00:17, 1.33it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:23<00:16, 1.36it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:24<00:16, 1.31it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:25<00:16, 1.30it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:26<00:16, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:26<00:14, 1.30it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:27<00:13, 1.33it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:28<00:12, 1.31it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:29<00:12, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:29<00:11, 1.30it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:30<00:11, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:31<00:10, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:32<00:09, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:32<00:08, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:34<00:09, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:35<00:09, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:36<00:08, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:37<00:06, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:37<00:05, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:38<00:04, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:39<00:03, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:40<00:02, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:41<00:01, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:42<00:00, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:42<00:00, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:42:29.099\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1057\u001b[0m - \u001b[1mStep 2 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.78}\u001b[0m\n", "randomly update dataset\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:42:31.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.032 | Total tokens: 174790 | Current cost: $0.001 | Current tokens: 5121\u001b[0m\n", "- The workflow lacks validation steps to confirm the accuracy of predictions before generating the final answer, leading to multiple instances of incorrect solutions.\n", "- There is an assumption that all questions can be answered with a simple 'Yes' or 'No' without considering the complexity of gene expression changes, which may not always be valid.\n", "- The workflow does not account for the context or specific details of each perturbation, which could lead to misleading predictions.\n", "- The execution history shows multiple cases where the predicted answers were incorrect, indicating a failure in the underlying computational model or logic used to derive answers.\n", "- There is no mechanism to handle or flag cases where the same gene is perturbed and measured, which could lead to circular reasoning or incorrect conclusions.\n", "\u001b[32m2026-01-05 09:42:32.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.032 | Total tokens: 175397 | Current cost: $0.000 | Current tokens: 607\u001b[0m\n", "```python\n", "steps = [\n", " {'name': 'validate_predictions', 'args': ['question'], 'outputs': ['validated_answer']},\n", " {'name': 'generate_answer', 'args': ['validated_answer'], 'outputs': ['answer']}\n", "]\n", "```\n", "\u001b[32m2026-01-05 09:42:32.119\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['validate_predictions7073', 'generate_answer']\u001b[0m\n", "Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of UBE2D3, does the expression profile of LRFN4 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of TKT is associated with a significant change in PRH1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of DNMT1, does the expression profile of KRT81 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to HUWE1 and then measure expression of RPL22L1. Does this perturbation cause a significant change in RPL22L1 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MTPAP and then measure expression of MT-CO2. Does this perturbation cause a significant change in MT-CO2 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to WDR36 and then measure expression of KNL1. Does this perturbation cause a significant change in KNL1 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, MOB4 is perturbed and MOB4 expression is measured. Determine whether MOB4 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MAD2L2 and then measure expression of DSG2. Does this perturbation cause a significant change in DSG2 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb TYK2 and monitor TAF7 expression. Decide whether this perturbation leads to a significant alteration in TAF7 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, NUBP1 is perturbed and the expression of SAAL1 is measured. Does this perturbation cause a significant change in SAAL1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of KIF18A is associated with a significant change in DONSON expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb USP5 and examine the expression of MAP1A. Does perturbing USP5 lead to a significant change in MAP1A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of MRPL49 is associated with a significant change in C1QL1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, MRPL55 is perturbed and the expression of MT-ND3 is measured. Determine whether MT-ND3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, CDC37 is perturbed and the expression of SSX2IP is measured. Determine whether SSX2IP shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb PSMC4 and examine the expression of SAT1. Does perturbing PSMC4 lead to a significant change in SAT1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb ISCU and examine the expression of SCARA3. Does perturbing ISCU lead to a significant change in SCARA3 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb EXOSC9 and monitor CCNA2 expression. Decide whether this perturbation leads to a significant alteration in CCNA2 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, PHB2 is perturbed and the expression of NFRKB is measured. Determine whether NFRKB shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, USP9X is perturbed and USP9X expression is measured. Determine whether USP9X exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GABPA, does the expression profile of CCNB1 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, RNF103 is perturbed and the expression of AZI2 is measured. Determine whether AZI2 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to ATP5PO and then measure expression of ATP5PO. Does this perturbation cause a significant change in ATP5PO expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, MRPS9 is perturbed and MT-ND2 expression is measured. Determine whether MT-ND2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PDCD6 is perturbed and KCNN4 expression is measured. Determine whether KCNN4 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, NBAS is perturbed and the expression of SCD is measured. Does this perturbation cause a significant change in SCD expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, CCND3 is perturbed and the expression of CCND3 is measured. Does this perturbation cause a significant change in CCND3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which CHTF18 is perturbed and ZIM3 expression is observed. Does this perturbation lead to a significant difference in ZIM3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, KLC2 is perturbed and SNHG11 expression is measured. Determine whether SNHG11 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, MYBL2 is perturbed and the expression of ZNRF1 is measured. Determine whether ZNRF1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb RBBP5 and examine the expression of ANXA10. Does perturbing RBBP5 lead to a significant change in ANXA10 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ATP5PO, does the expression profile of EMP3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of TMEM214 is associated with a significant change in TMEM214 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which USP8 is perturbed and PUDP expression is observed. Does this perturbation lead to a significant difference in PUDP expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb PPP2CB and examine the expression of PPP2CB. Does perturbing PPP2CB lead to a significant change in PPP2CB expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb EXOSC5 and monitor MIRLET7BHG expression. Decide whether this perturbation leads to a significant alteration in MIRLET7BHG expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which CCNH is perturbed and ZMAT3 expression is observed. Does this perturbation lead to a significant difference in ZMAT3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of PSMC3, does the expression profile of MYO19 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, PIAS4 is perturbed and the expression of DYNC1I2 is measured. Determine whether DYNC1I2 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, MFN2 is perturbed and the expression of HMOX1 is measured. Does this perturbation cause a significant change in HMOX1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, ITGB1BP1 is perturbed and the expression of ITGB1BP1 is measured. Determine whether ITGB1BP1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to BRD2 and then measure expression of DHODH. Does this perturbation cause a significant change in DHODH expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of RPL32, does the expression profile of SUGCT indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, COG2 is perturbed and the expression of ARPIN is measured. Determine whether ARPIN shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb GNL3L and monitor ZWILCH expression. Decide whether this perturbation leads to a significant alteration in ZWILCH expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which POLR3H is perturbed and CDC25A expression is observed. Does this perturbation lead to a significant difference in CDC25A expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UHRF1 is perturbed and the expression of UHRF1 is measured. Does this perturbation cause a significant change in UHRF1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of RPS15A, does the expression profile of FANCI indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of AURKAIP1 is associated with a significant change in AURKAIP1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SPATA5 is perturbed and the expression of CDKN1A is measured. Determine whether CDKN1A shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'validate_predictions7073', 'description': 'Task to validate_predictions7073. Takes question as input. Produces validated_answer as output.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'Input parameter question for validate_predictions7073', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_predictions7073', 'required': True}], 'prompt': 'Your are a task solver.', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:42:33.807\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.033 | Total tokens: 180557 | Current cost: $0.001 | Current tokens: 5160\u001b[0m\n", "\u001b[32m2026-01-05 09:42:34.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.033 | Total tokens: 180653 | Current cost: $0.000 | Current tokens: 96\u001b[0m\n", "\u001b[32m2026-01-05 09:42:36.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.033 | Total tokens: 181221 | Current cost: $0.000 | Current tokens: 568\u001b[0m\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': None, 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n", "\u001b[32m2026-01-05 09:42:38.917\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.034 | Total tokens: 186381 | Current cost: $0.001 | Current tokens: 5160\u001b[0m\n", "\u001b[32m2026-01-05 09:42:39.512\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.034 | Total tokens: 186485 | Current cost: $0.000 | Current tokens: 104\u001b[0m\n", "\u001b[32m2026-01-05 09:42:40.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.034 | Total tokens: 187108 | Current cost: $0.000 | Current tokens: 623\u001b[0m\n", "\u001b[32m2026-01-05 09:42:40.506\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['validate_predictions7073', 'generate_answer']\u001b[0m\n", "\u001b[32m2026-01-05 09:42:40.507\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1054\u001b[0m - \u001b[1mEvaluate the workflow at step 3 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:37, 1.32it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:38, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:37, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:36, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:36, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:04<00:34, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:05<00:33, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:06<00:31, 1.34it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:07<00:34, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:08<00:33, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:08<00:31, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:10<00:35, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:10<00:33, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:11<00:31, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:12<00:30, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:13<00:31, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:14<00:29, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:15<00:29, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:16<00:31, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:17<00:29, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:18<00:27, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:19<00:24, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:19<00:22, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:20<00:21, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:21<00:19, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:22<00:19, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:22<00:17, 1.31it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:23<00:17, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:24<00:16, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:25<00:15, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:26<00:14, 1.31it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:26<00:14, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:28<00:16, 1.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:29<00:14, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:29<00:12, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:30<00:11, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:31<00:11, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:32<00:10, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:33<00:09, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:34<00:09, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:35<00:08, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:36<00:07, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:37<00:06, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:37<00:05, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:38<00:04, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:39<00:03, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:40<00:02, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:41<00:01, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:42<00:00, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:43<00:00, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:43:24.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1057\u001b[0m - \u001b[1mStep 3 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.84}\u001b[0m\n", "randomly update dataset\n", "\u001b[32m2026-01-05 09:43:24.299\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['validate_predictions7073', 'generate_answer']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:43:27.054\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.041 | Total tokens: 222948 | Current cost: $0.001 | Current tokens: 5154\u001b[0m\n", "- The workflow lacks a validation step after generating the answer, which could help ensure the accuracy of the predictions before finalizing the solution.\n", "- There is a structural flaw in the workflow as it does not account for the possibility of conflicting predictions and solutions, leading to multiple instances of incorrect computation results.\n", "- The workflow does not include any error handling mechanisms for cases where the predictions and solutions do not align, resulting in unaddressed discrepancies.\n", "- The prompts in the questions are overly similar and do not provide sufficient context or differentiation, which may lead to ambiguity in understanding the specific perturbations and their expected outcomes.\n", "- The execution history shows multiple instances of incorrect solutions without any indication of how these errors were addressed or analyzed, indicating a lack of thorough review or feedback mechanisms in the workflow.\n", "\u001b[32m2026-01-05 09:43:27.729\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.041 | Total tokens: 223587 | Current cost: $0.000 | Current tokens: 639\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_predictions7073', 'args': ['answer'], 'outputs': ['validated_answer']}\n", "]\n", "```\n", "\u001b[32m2026-01-05 09:43:27.731\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n", "Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, ATP6V1E1 is perturbed and the expression of ATP6V1E1 is measured. Determine whether ATP6V1E1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMD3 and monitor PRIM1 expression. Decide whether this perturbation leads to a significant alteration in PRIM1 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, PPP1R10 is perturbed and MOSPD3 expression is quantified. Does this perturbation result in a significant change in MOSPD3 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, RUVBL1 is perturbed and the expression of EPB41L4A-AS1 is measured. Determine whether EPB41L4A-AS1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DYNC1I2 and examine the expression of DEDD. Does perturbing DYNC1I2 lead to a significant change in DEDD expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb MCMBP and monitor H2AFX expression. Decide whether this perturbation leads to a significant alteration in H2AFX expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, MRPL10 is perturbed and MT-ND3 expression is measured. Determine whether MT-ND3 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CCNL1 and then measure expression of POLR2J3. Does this perturbation cause a significant change in POLR2J3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of NOP14 is associated with a significant change in KIFC1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, MRPL17 is perturbed and the expression of MRPL17 is measured. Determine whether MRPL17 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BOP1 is perturbed and TPX2 expression is observed. Does this perturbation lead to a significant difference in TPX2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb BCLAF1 and examine the expression of BCLAF1. Does perturbing BCLAF1 lead to a significant change in BCLAF1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb CMTR1 and examine the expression of SNHG29. Does perturbing CMTR1 lead to a significant change in SNHG29 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb GFER and monitor GFER expression. Decide whether this perturbation leads to a significant alteration in GFER expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, SRSF2 is perturbed and the expression of ENPP1 is measured. Does this perturbation cause a significant change in ENPP1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, PELP1 is perturbed and the expression of CDKN1A is measured. Does this perturbation cause a significant change in CDKN1A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SF3B2 is associated with a significant change in CCHCR1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SETX, does the expression profile of TXNIP indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of POLR1E, does the expression profile of CCNE2 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which NOP2 is perturbed and TK1 expression is observed. Does this perturbation lead to a significant difference in TK1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which SF1 is perturbed and RASA1 expression is observed. Does this perturbation lead to a significant difference in RASA1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PRORP is perturbed and MT-CYB expression is observed. Does this perturbation lead to a significant difference in MT-CYB expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of EIF5A, does the expression profile of MNS1 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, CCT5 is perturbed and the expression of ARHGAP11A is measured. Does this perturbation cause a significant change in ARHGAP11A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRPL11 and then measure expression of MRPL11. Does this perturbation cause a significant change in MRPL11 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, METTL17 is perturbed and RPL22 expression is measured. Determine whether RPL22 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which GRSF1 is perturbed and PLOD1 expression is observed. Does this perturbation lead to a significant difference in PLOD1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb C5orf30 and examine the expression of FANCI. Does perturbing C5orf30 lead to a significant change in FANCI expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which COX11 is perturbed and IGFBP3 expression is observed. Does this perturbation lead to a significant difference in IGFBP3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRPS31 and then measure expression of MT-CO1. Does this perturbation cause a significant change in MT-CO1 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, PPP1R15B is perturbed and SRP72 expression is quantified. Does this perturbation result in a significant change in SRP72 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TERF2 is perturbed and SAPCD2 expression is observed. Does this perturbation lead to a significant difference in SAPCD2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of RAB18 is associated with a significant change in NAGPA expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of RPL10 is associated with a significant change in PCLAF expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, GOLT1B is perturbed and YIF1A expression is quantified. Does this perturbation result in a significant change in YIF1A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SPCS3 and monitor SLC3A2 expression. Decide whether this perturbation leads to a significant alteration in SLC3A2 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PREB is perturbed and CDCA2 expression is observed. Does this perturbation lead to a significant difference in CDCA2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of LSM2, does the expression profile of VPS26B indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, PRPF39 is perturbed and the expression of TTC37 is measured. Determine whether TTC37 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CCT4 and then measure expression of CCT4. Does this perturbation cause a significant change in CCT4 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TMED10 is perturbed and the expression of YIF1A is measured. Determine whether YIF1A shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, CDC16 is perturbed and the expression of CDC16 is measured. Determine whether CDC16 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PAM16 is perturbed and DDIT4 expression is measured. Determine whether DDIT4 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ZMYM4 is associated with a significant change in IFNGR1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TAF6 is perturbed and the expression of PLEKHH3 is measured. Determine whether PLEKHH3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: No\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PELP1 and monitor CDKN1A expression. Decide whether this perturbation leads to a significant alteration in CDKN1A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which GINS1 is perturbed and EID1 expression is observed. Does this perturbation lead to a significant difference in EID1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of EIF4A1 is associated with a significant change in AXL expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb OGFOD1 and examine the expression of TOE1. Does perturbing OGFOD1 lead to a significant change in TOE1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to NUP88 and then measure expression of CEP55. Does this perturbation cause a significant change in CEP55 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context provided in `{question}` to determine the best answer. Ensure that your response is clear and concise, directly addressing the question without any additional commentary or reasoning. Validate your answer against known correct solutions, considering explicit criteria for \"significant change\" and distinguishing between different types of perturbations before finalizing it.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:43:29.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.042 | Total tokens: 228760 | Current cost: $0.001 | Current tokens: 5173\u001b[0m\n", "\u001b[32m2026-01-05 09:43:29.885\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.042 | Total tokens: 228857 | Current cost: $0.000 | Current tokens: 97\u001b[0m\n", "\u001b[32m2026-01-05 09:43:30.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.042 | Total tokens: 229498 | Current cost: $0.000 | Current tokens: 641\u001b[0m\n", "{'name': 'validate_predictions7073', 'description': 'Task to validate_predictions7073. Takes question as input. Produces validated_answer as output.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'Input parameter question for validate_predictions7073', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_predictions7073', 'required': True}], 'prompt': '```xml\\nINSTRUCTION for the 1-th task:\\n\"\"\"\\nTo validate the predictions, first assess the provided {question} against established ground-truth solutions. Determine if the predictions reflect significant changes based on explicit criteria, such as a predefined threshold for accuracy or relevance. Document your reasoning in the \\'thought\\' field, explaining how the predictions align or diverge from the expected outcomes. Provide the validated answer in the \\'answer\\' field, ensuring clarity and conciseness.\\nFormat your output in xml format, such as xxx and xxx.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n", "\u001b[32m2026-01-05 09:43:32.374\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.043 | Total tokens: 234712 | Current cost: $0.001 | Current tokens: 5214\u001b[0m\n", "\u001b[32m2026-01-05 09:43:33.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.043 | Total tokens: 234800 | Current cost: $0.000 | Current tokens: 88\u001b[0m\n", "\u001b[32m2026-01-05 09:43:34.780\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.043 | Total tokens: 235531 | Current cost: $0.000 | Current tokens: 731\u001b[0m\n", "\u001b[32m2026-01-05 09:43:34.781\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n", "\u001b[32m2026-01-05 09:43:34.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1054\u001b[0m - \u001b[1mEvaluate the workflow at step 4 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:41, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:36, 1.31it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:33, 1.40it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:02<00:32, 1.42it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:03<00:36, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:04<00:36, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:05<00:34, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:06<00:34, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:07<00:32, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:08<00:33, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:09<00:35, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:10<00:34, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:11<00:34, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:11<00:31, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:12<00:29, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:13<00:28, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:14<00:26, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:14<00:24, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:15<00:23, 1.31it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:16<00:23, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:17<00:22, 1.30it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:17<00:21, 1.30it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:18<00:19, 1.37it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:19<00:19, 1.35it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:20<00:19, 1.30it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:20<00:18, 1.32it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:21<00:19, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:22<00:20, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:23<00:17, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:24<00:17, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:25<00:16, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:26<00:15, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:27<00:14, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:28<00:13, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:28<00:12, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:29<00:11, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:30<00:10, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:30<00:09, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:31<00:08, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:32<00:07, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:33<00:07, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:34<00:06, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:34<00:05, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:35<00:04, 1.33it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:36<00:03, 1.35it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:37<00:02, 1.35it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:37<00:02, 1.38it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:38<00:01, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:39<00:00, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:40<00:00, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:44:14.952\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1057\u001b[0m - \u001b[1mStep 4 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.84}\u001b[0m\n", "randomly update dataset\n", "\u001b[32m2026-01-05 09:44:14.954\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['validate_predictions7073', 'generate_answer']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:44:17.095\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.049 | Total tokens: 271348 | Current cost: $0.001 | Current tokens: 5139\u001b[0m\n", "- The workflow lacks a validation step after generating the answer, which could help identify inconsistencies between predictions and actual solutions.\n", "- There is no error handling or logging mechanism to address incorrect computation results, leading to multiple instances of incorrect solutions without any corrective action.\n", "- The workflow assumes that all questions will yield a straightforward 'Yes' or 'No' answer without considering the complexity of gene expression data, which may not always fit this binary format.\n", "- The execution history shows multiple instances where the predictions and solutions do not align, indicating a failure to accurately compute or interpret the results.\n", "- The workflow does not account for the possibility of ambiguous or unclear questions, which could lead to misinterpretation and incorrect answers.\n", "\u001b[32m2026-01-05 09:44:17.727\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.050 | Total tokens: 271974 | Current cost: $0.000 | Current tokens: 626\u001b[0m\n", "```python\n", "steps = [\n", " {'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", " {'name': 'validate_predictions7073', 'args': ['question'], 'outputs': ['validated_answer']}\n", "]\n", "```\n", "\u001b[32m2026-01-05 09:44:17.729\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n", "Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which AIFM1 is perturbed and MIEN1 expression is observed. Does this perturbation lead to a significant difference in MIEN1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to RPP21 and then measure expression of NUPR1. Does this perturbation cause a significant change in NUPR1 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of TIMM13 is associated with a significant change in TIMM13 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TPR is perturbed and the expression of VSIR is measured. Determine whether VSIR shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, FARS2 is perturbed and GMEB1 expression is quantified. Does this perturbation result in a significant change in GMEB1 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, BRIX1 is perturbed and the expression of STEAP1 is measured. Determine whether STEAP1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb NUP62 and examine the expression of PRC1. Does perturbing NUP62 lead to a significant change in PRC1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, WDR12 is perturbed and the expression of NUSAP1 is measured. Does this perturbation cause a significant change in NUSAP1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb VMP1 and examine the expression of GCNT2. Does perturbing VMP1 lead to a significant change in GCNT2 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, CHMP5 is perturbed and the expression of CXCL8 is measured. Determine whether CXCL8 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, PALB2 is perturbed and POLQ expression is quantified. Does this perturbation result in a significant change in POLQ expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which DIS3 is perturbed and MTHFD1 expression is observed. Does this perturbation lead to a significant difference in MTHFD1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PSMA2 is associated with a significant change in SGO1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb RSL1D1 and monitor RSL1D1 expression. Decide whether this perturbation leads to a significant alteration in RSL1D1 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which GTF3C3 is perturbed and RPL22L1 expression is observed. Does this perturbation lead to a significant difference in RPL22L1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, GSK3B is perturbed and the expression of SLC38A1 is measured. Does this perturbation cause a significant change in SLC38A1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, PALB2 is perturbed and the expression of POLQ is measured. Does this perturbation cause a significant change in POLQ expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of UBA2 is associated with a significant change in UBA2 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb UBA5 and monitor DCBLD2 expression. Decide whether this perturbation leads to a significant alteration in DCBLD2 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SPCS2 is perturbed and SPCS2 expression is quantified. Does this perturbation result in a significant change in SPCS2 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, PRKRIP1 is perturbed and PDIA4 expression is quantified. Does this perturbation result in a significant change in PDIA4 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of LSM10, does the expression profile of HIST1H1C indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to ACTR2 and then measure expression of ACTR2. Does this perturbation cause a significant change in ACTR2 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, EIF4E is perturbed and the expression of CENPW is measured. Determine whether CENPW shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PRC1 and monitor PRC1 expression. Decide whether this perturbation leads to a significant alteration in PRC1 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to RPL36 and then measure expression of RPL36. Does this perturbation cause a significant change in RPL36 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to RPL29 and then measure expression of RPL29. Does this perturbation cause a significant change in RPL29 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb PIK3R4 and examine the expression of CD9. Does perturbing PIK3R4 lead to a significant change in CD9 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SDHAF2 is perturbed and the expression of SDHAF2 is measured. Determine whether SDHAF2 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb PRDM8 and examine the expression of BRI3. Does perturbing PRDM8 lead to a significant change in BRI3 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb HSPE1 and monitor HSPD1 expression. Decide whether this perturbation leads to a significant alteration in HSPD1 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SRSF3 is perturbed and XPO4 expression is measured. Determine whether XPO4 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, PSMB4 is perturbed and MAP1A expression is quantified. Does this perturbation result in a significant change in MAP1A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PSMB7 is associated with a significant change in EEF1AKMT1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb RPL34 and monitor SKA3 expression. Decide whether this perturbation leads to a significant alteration in SKA3 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, PCNX3 is perturbed and the expression of ZIM3 is measured. Does this perturbation cause a significant change in ZIM3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, DHPS is perturbed and POLRMT expression is measured. Determine whether POLRMT exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, RNMT is perturbed and SNHG8 expression is measured. Determine whether SNHG8 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to EXOSC8 and then measure expression of POLA1. Does this perturbation cause a significant change in POLA1 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which POLR1A is perturbed and CENPE expression is observed. Does this perturbation lead to a significant difference in CENPE expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of DNAJC17 is associated with a significant change in SYNE2 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of GTF3C4, does the expression profile of COPS9 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb EIF3D and monitor TK1 expression. Decide whether this perturbation leads to a significant alteration in TK1 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEM1 is perturbed and BIRC5 expression is measured. Determine whether BIRC5 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, PSMA6 is perturbed and the expression of SRPX is measured. Determine whether SRPX shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, DIMT1 is perturbed and NEK2 expression is measured. Determine whether NEK2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, LSM12 is perturbed and the expression of TGOLN2 is measured. Determine whether TGOLN2 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which MRPL27 is perturbed and MT-ND2 expression is observed. Does this perturbation lead to a significant difference in MT-ND2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb ZNF24 and examine the expression of ZNF24. Does perturbing ZNF24 lead to a significant change in ZNF24 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, JAZF1 is perturbed and MYCBP2 expression is measured. Determine whether MYCBP2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context provided in `{question}` to determine the best answer. Ensure that your response is clear and concise, directly addressing the question without any additional commentary or reasoning. Validate your answer against known correct solutions, considering explicit criteria for \"significant change\" and distinguishing between different types of perturbations before finalizing it.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:44:19.669\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.050 | Total tokens: 277143 | Current cost: $0.001 | Current tokens: 5169\u001b[0m\n", "\u001b[32m2026-01-05 09:44:20.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.050 | Total tokens: 277247 | Current cost: $0.000 | Current tokens: 104\u001b[0m\n", "\u001b[32m2026-01-05 09:44:21.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.051 | Total tokens: 277964 | Current cost: $0.000 | Current tokens: 717\u001b[0m\n", "{'name': 'validate_predictions7073', 'description': 'Task to validate_predictions7073. Takes question as input. Produces validated_answer as output.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'Input parameter question for validate_predictions7073', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_predictions7073', 'required': True}], 'prompt': '```xml\\nINSTRUCTION for the 1-th task:\\n\"\"\"\\nTo validate the predictions, first assess the provided {question} against established ground-truth solutions. Determine if the predictions reflect significant changes based on explicit criteria, such as a predefined threshold for accuracy or relevance. Document your reasoning in the \\'thought\\' field, explaining how the predictions align or diverge from the expected outcomes. Provide the validated answer in the \\'answer\\' field, ensuring clarity and conciseness.\\nFormat your output in xml format, such as xxx and xxx.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n", "\u001b[32m2026-01-05 09:44:23.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.052 | Total tokens: 283196 | Current cost: $0.001 | Current tokens: 5232\u001b[0m\n", "\u001b[32m2026-01-05 09:44:24.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.052 | Total tokens: 283292 | Current cost: $0.000 | Current tokens: 96\u001b[0m\n", "\u001b[32m2026-01-05 09:44:26.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.052 | Total tokens: 284213 | Current cost: $0.000 | Current tokens: 921\u001b[0m\n", "\u001b[32m2026-01-05 09:44:26.221\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n", "\u001b[32m2026-01-05 09:44:26.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1054\u001b[0m - \u001b[1mEvaluate the workflow at step 5 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:34, 1.40it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:35, 1.34it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:33, 1.40it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:02<00:33, 1.39it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:03<00:35, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:04<00:33, 1.33it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:05<00:34, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:06<00:33, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:06<00:32, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:07<00:29, 1.35it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:08<00:29, 1.34it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:09<00:29, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:09<00:28, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:10<00:28, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:11<00:27, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:12<00:27, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:13<00:26, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:13<00:24, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:14<00:24, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:15<00:24, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:16<00:27, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:17<00:26, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:18<00:24, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:19<00:24, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:20<00:24, 1.01it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:21<00:22, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:22<00:20, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:23<00:20, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:24<00:18, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:24<00:16, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:25<00:15, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:26<00:15, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:27<00:15, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:28<00:13, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:28<00:11, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:29<00:10, 1.34it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:30<00:09, 1.37it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:31<00:09, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:32<00:09, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:33<00:08, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:33<00:07, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:34<00:06, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:35<00:06, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:36<00:05, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:37<00:04, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:38<00:03, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:39<00:02, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:39<00:01, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:40<00:00, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:41<00:00, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:45:07.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1057\u001b[0m - \u001b[1mStep 5 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.84}\u001b[0m\n", "randomly update dataset\n", "\u001b[32m2026-01-05 09:45:07.864\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['validate_predictions7073', 'generate_answer']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:45:10.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.058 | Total tokens: 320058 | Current cost: $0.001 | Current tokens: 5153\u001b[0m\n", "- The workflow lacks a validation step after generating the answer, which could help ensure the accuracy of the predictions before finalizing the output.\n", "- There is an assumption that all questions can be answered with a simple 'Yes' or 'No' without considering the complexity of the underlying biological data, which may not always be valid.\n", "- The workflow does not account for potential contradictions in the predictions and solutions, as evidenced by multiple instances where the predictions were correct but the solutions were incorrect.\n", "- The control flow does not handle cases where the predictions and solutions diverge, leading to a lack of clarity on how to address discrepancies.\n", "- The execution history reveals multiple instances of incorrect computation results, indicating a systemic issue in the underlying model or data processing that is not addressed in the workflow.\n", "\u001b[32m2026-01-05 09:45:10.830\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.058 | Total tokens: 320696 | Current cost: $0.000 | Current tokens: 638\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_predictions7073', 'args': ['question'], 'outputs': ['validated_answer']}\n", "]\n", "```\n", "\u001b[32m2026-01-05 09:45:10.832\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n", "Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which METAP1 is perturbed and METAP1 expression is observed. Does this perturbation lead to a significant difference in METAP1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CCT3 and then measure expression of CDCA8. Does this perturbation cause a significant change in CDCA8 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of WEE1 is associated with a significant change in CDKN1A expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of RPAP3 is associated with a significant change in CEP41 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SFSWAP is perturbed and ATAD5 expression is measured. Determine whether ATAD5 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MTHFD1, does the expression profile of MND1 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to ARF4 and then measure expression of ARF4. Does this perturbation cause a significant change in ARF4 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb CCAR1 and examine the expression of EFEMP2. Does perturbing CCAR1 lead to a significant change in EFEMP2 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which CTCF is perturbed and QRICH1 expression is observed. Does this perturbation lead to a significant difference in QRICH1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, COA5 is perturbed and COA5 expression is quantified. Does this perturbation result in a significant change in COA5 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which WBP1 is perturbed and WBP1 expression is observed. Does this perturbation lead to a significant difference in WBP1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to PSMD11 and then measure expression of CHST14. Does this perturbation cause a significant change in CHST14 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CSNK2B is perturbed and SELENOM expression is quantified. Does this perturbation result in a significant change in SELENOM expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SRRT, does the expression profile of UQCC2 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, HSPA14 is perturbed and S100A13 expression is measured. Determine whether S100A13 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TAF10 is perturbed and COQ3 expression is observed. Does this perturbation lead to a significant difference in COQ3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of LRP5, does the expression profile of RPS27 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, TMEM242 is perturbed and TMEM242 expression is quantified. Does this perturbation result in a significant change in TMEM242 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb CINP and monitor FANCA expression. Decide whether this perturbation leads to a significant alteration in FANCA expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, ANAPC4 is perturbed and PSMC3IP expression is quantified. Does this perturbation result in a significant change in PSMC3IP expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of KARS, does the expression profile of DSN1 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PSMB3 is associated with a significant change in GABARAPL1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HMGB3, does the expression profile of METRN indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SMC6 and examine the expression of LMNB1. Does perturbing SMC6 lead to a significant change in LMNB1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, YPEL5 is perturbed and DNAJB12 expression is quantified. Does this perturbation result in a significant change in DNAJB12 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, FARSA is perturbed and SHCBP1 expression is measured. Determine whether SHCBP1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PTEN and monitor MYO5A expression. Decide whether this perturbation leads to a significant alteration in MYO5A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which NCAPD2 is perturbed and NCAPD2 expression is observed. Does this perturbation lead to a significant difference in NCAPD2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, CDC23 is perturbed and the expression of MCM10 is measured. Does this perturbation cause a significant change in MCM10 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, ANAPC10 is perturbed and the expression of ANAPC10 is measured. Does this perturbation cause a significant change in ANAPC10 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, HMGCR is perturbed and HMGCR expression is quantified. Does this perturbation result in a significant change in HMGCR expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NAPG and monitor TUBA1C expression. Decide whether this perturbation leads to a significant alteration in TUBA1C expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, RPL37A is perturbed and RPL37A expression is quantified. Does this perturbation result in a significant change in RPL37A expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, EXOC3 is perturbed and ADRB2 expression is quantified. Does this perturbation result in a significant change in ADRB2 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, CBLL1 is perturbed and the expression of LGALS3 is measured. Determine whether LGALS3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PPP1CA is perturbed and FST expression is measured. Determine whether FST exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which MED16 is perturbed and SOX11 expression is observed. Does this perturbation lead to a significant difference in SOX11 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, MRPS25 is perturbed and MT-CO1 expression is measured. Determine whether MT-CO1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, TBC1D1 is perturbed and the expression of PSMC3IP is measured. Determine whether PSMC3IP shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, NUP35 is perturbed and the expression of STPG1 is measured. Does this perturbation cause a significant change in STPG1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to PPIL4 and then measure expression of ZMAT3. Does this perturbation cause a significant change in ZMAT3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to RRP7A and then measure expression of RFC4. Does this perturbation cause a significant change in RFC4 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of TRMT6 is associated with a significant change in PCMT1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to EIF2S1 and then measure expression of MYBL2. Does this perturbation cause a significant change in MYBL2 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, PPP1CB is perturbed and the expression of NR2C2AP is measured. Determine whether NR2C2AP shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb EIF5 and examine the expression of NEGR1. Does perturbing EIF5 lead to a significant change in NEGR1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, POLG is perturbed and POLG expression is quantified. Does this perturbation result in a significant change in POLG expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb BCS1L and examine the expression of PTGES2. Does perturbing BCS1L lead to a significant change in PTGES2 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to HEATR1 and then measure expression of ARHGAP11A. Does this perturbation cause a significant change in ARHGAP11A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb EIF2B1 and examine the expression of TYMS. Does perturbing EIF2B1 lead to a significant change in TYMS expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context provided in `{question}` to determine the best answer. Ensure that your response is clear and concise, directly addressing the question without any additional commentary or reasoning. Validate your answer against known correct solutions, considering explicit criteria for \"significant change\" and distinguishing between different types of perturbations before finalizing it.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:45:12.459\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.059 | Total tokens: 325886 | Current cost: $0.001 | Current tokens: 5190\u001b[0m\n", "\u001b[32m2026-01-05 09:45:13.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.059 | Total tokens: 325996 | Current cost: $0.000 | Current tokens: 110\u001b[0m\n", "\u001b[32m2026-01-05 09:45:14.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.060 | Total tokens: 326794 | Current cost: $0.000 | Current tokens: 798\u001b[0m\n", "{'name': 'validate_predictions7073', 'description': 'Task to validate_predictions7073. Takes question as input. Produces validated_answer as output.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'Input parameter question for validate_predictions7073', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_predictions7073', 'required': True}], 'prompt': '```xml\\nINSTRUCTION for the 1-th task:\\n\"\"\"\\nTo validate the predictions, first assess the provided {question} against established ground-truth solutions. Determine if the predictions reflect significant changes based on explicit criteria, such as a predefined threshold for accuracy or relevance. Document your reasoning in the \\'thought\\' field, explaining how the predictions align or diverge from the expected outcomes. Provide the validated answer in the \\'answer\\' field, ensuring clarity and conciseness.\\nFormat your output in xml format, such as xxx and xxx.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n", "\u001b[32m2026-01-05 09:45:16.212\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.060 | Total tokens: 332038 | Current cost: $0.001 | Current tokens: 5244\u001b[0m\n", "\u001b[32m2026-01-05 09:45:16.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.060 | Total tokens: 332147 | Current cost: $0.000 | Current tokens: 109\u001b[0m\n", "\u001b[32m2026-01-05 09:45:18.710\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.061 | Total tokens: 333257 | Current cost: $0.000 | Current tokens: 1110\u001b[0m\n", "\u001b[32m2026-01-05 09:45:18.711\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n", "\u001b[32m2026-01-05 09:45:18.711\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1054\u001b[0m - \u001b[1mEvaluate the workflow at step 6 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:41, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:47, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:44, 1.05it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:04<00:56, 1.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:05<00:46, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:39, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:37, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:33, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:08<00:31, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:09<00:34, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:09<00:32, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:10<00:31, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:11<00:28, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:12<00:28, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:12<00:26, 1.33it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:13<00:28, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:14<00:28, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:15<00:28, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:16<00:26, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:17<00:27, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:18<00:27, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:19<00:24, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:20<00:23, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:20<00:22, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:21<00:20, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:22<00:20, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:23<00:18, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:23<00:16, 1.32it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:24<00:16, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:25<00:17, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:26<00:16, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:27<00:14, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:27<00:12, 1.33it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:28<00:11, 1.34it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:29<00:11, 1.34it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:30<00:10, 1.37it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:30<00:09, 1.37it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:31<00:09, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:32<00:09, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:33<00:08, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:34<00:07, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:35<00:06, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:35<00:05, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:36<00:04, 1.31it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:37<00:03, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:38<00:03, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:39<00:02, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:39<00:01, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:40<00:00, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:41<00:00, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:46:00.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1057\u001b[0m - \u001b[1mStep 6 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.88}\u001b[0m\n", "randomly update dataset\n", "\u001b[32m2026-01-05 09:46:00.295\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:46:02.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.067 | Total tokens: 369027 | Current cost: $0.001 | Current tokens: 5149\u001b[0m\n", "- The workflow lacks a validation step to ensure the correctness of the generated answer before final output, leading to multiple instances of incorrect solutions.\n", "- There is an absence of error handling or mechanisms to address cases where the predictions do not match the solutions, resulting in unhandled discrepancies.\n", "- The control flow does not account for the possibility of generating an answer that contradicts the validation step, leading to inconsistencies in the final outputs.\n", "- The workflow assumes that all questions can be answered with a simple 'Yes' or 'No' without considering the complexity of the underlying data, which may not always be valid.\n", "- The execution history reveals multiple instances where the predictions were incorrect, indicating a systematic flaw in the answer generation process that is not addressed in the workflow.\n", "\u001b[32m2026-01-05 09:46:02.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.067 | Total tokens: 369661 | Current cost: $0.000 | Current tokens: 634\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_predictions7073', 'args': ['answer'], 'outputs': ['validated_answer']}\n", "]\n", "```\n", "\u001b[32m2026-01-05 09:46:02.988\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n", "Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of MAK16, does the expression profile of CDKN3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, RNF4 is perturbed and ZNF585A expression is measured. Determine whether ZNF585A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of WDR77, does the expression profile of PYGL indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of INTS7, does the expression profile of CDCA3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ORC3 is associated with a significant change in SFR1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb DDX17 and monitor ANKRD1 expression. Decide whether this perturbation leads to a significant alteration in ANKRD1 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to ELOB and then measure expression of CA9. Does this perturbation cause a significant change in CA9 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb ELOB and examine the expression of ELOB. Does perturbing ELOB lead to a significant change in ELOB expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, FCF1 is perturbed and the expression of GTSE1 is measured. Determine whether GTSE1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, ATP2A2 is perturbed and the expression of DDX17 is measured. Determine whether DDX17 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb FBRSL1 and examine the expression of CDKN1A. Does perturbing FBRSL1 lead to a significant change in CDKN1A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, RPL27A is perturbed and the expression of CDKN1A is measured. Does this perturbation cause a significant change in CDKN1A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PPP1R15B and monitor PCBD1 expression. Decide whether this perturbation leads to a significant alteration in PCBD1 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb IMPA2 and examine the expression of EMG1. Does perturbing IMPA2 lead to a significant change in EMG1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of HSPE1 is associated with a significant change in HSPE1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CEP97 and then measure expression of CEP97. Does this perturbation cause a significant change in CEP97 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TMEM127 and examine the expression of CIAO1. Does perturbing TMEM127 lead to a significant change in CIAO1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb RNASEH2C and examine the expression of RNASEH2C. Does perturbing RNASEH2C lead to a significant change in RNASEH2C expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DAD1 and then measure expression of POLE. Does this perturbation cause a significant change in POLE expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb EMC1 and examine the expression of NEK2. Does perturbing EMC1 lead to a significant change in NEK2 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of EXOSC2 is associated with a significant change in IMPA2 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, PFN1 is perturbed and the expression of IL1B is measured. Determine whether IL1B shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of EWSR1 is associated with a significant change in EWSR1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TAF2 is perturbed and the expression of PCOLCE2 is measured. Does this perturbation cause a significant change in PCOLCE2 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, NSRP1 is perturbed and the expression of PODXL2 is measured. Determine whether PODXL2 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb YKT6 and monitor SGO1 expression. Decide whether this perturbation leads to a significant alteration in SGO1 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb NUBP2 and examine the expression of CDKN1A. Does perturbing NUBP2 lead to a significant change in CDKN1A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, EDC4 is perturbed and the expression of ZMAT3 is measured. Does this perturbation cause a significant change in ZMAT3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to PSMB1 and then measure expression of NRGN. Does this perturbation cause a significant change in NRGN expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb ZMAT2 and monitor CDKN1A expression. Decide whether this perturbation leads to a significant alteration in CDKN1A expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb MRTO4 and monitor POLA2 expression. Decide whether this perturbation leads to a significant alteration in POLA2 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SEH1L and examine the expression of H2AFZ. Does perturbing SEH1L lead to a significant change in H2AFZ expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CWC25 is perturbed and CWC25 expression is measured. Determine whether CWC25 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of WAC is associated with a significant change in WAC expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SRSF7 and then measure expression of RRM2. Does this perturbation cause a significant change in RRM2 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SDAD1 is associated with a significant change in DOCK10 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ZNF574, does the expression profile of SELENOM indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of CCT7, does the expression profile of CDC45 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SUDS3 is perturbed and THY1 expression is measured. Determine whether THY1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SNW1, does the expression profile of HIST3H2A indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SDE2 and monitor IL1B expression. Decide whether this perturbation leads to a significant alteration in IL1B expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PPME1 is perturbed and PPME1 expression is observed. Does this perturbation lead to a significant difference in PPME1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SEC61A1 and then measure expression of SEC61A1. Does this perturbation cause a significant change in SEC61A1 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb PRKCA and examine the expression of THY1. Does perturbing PRKCA lead to a significant change in THY1 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, PSMD7 is perturbed and the expression of TRIM7 is measured. Determine whether TRIM7 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, FKBP9 is perturbed and ZNF431 expression is quantified. Does this perturbation result in a significant change in ZNF431 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of WDR25 is associated with a significant change in LCLAT1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, RPL23A is perturbed and ATAD5 expression is measured. Determine whether ATAD5 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRPL2 and then measure expression of MT-ND3. Does this perturbation cause a significant change in MT-ND3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, CS is perturbed and the expression of TGM2 is measured. Does this perturbation cause a significant change in TGM2 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context provided in `{question}` to determine the best answer. Ensure that your response is clear and concise, directly addressing the question without any additional commentary or reasoning. If the answer is uncertain or ambiguous, indicate this clearly. Validate your answer against known correct solutions before finalizing it, documenting any discrepancies with the ground truth.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:46:04.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.068 | Total tokens: 374748 | Current cost: $0.001 | Current tokens: 5087\u001b[0m\n", "\u001b[32m2026-01-05 09:46:05.365\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.068 | Total tokens: 374854 | Current cost: $0.000 | Current tokens: 106\u001b[0m\n", "\u001b[32m2026-01-05 09:46:06.404\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.068 | Total tokens: 375702 | Current cost: $0.000 | Current tokens: 848\u001b[0m\n", "{'name': 'validate_predictions7073', 'description': 'Task to validate_predictions7073. Takes question as input. Produces validated_answer as output.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'Input parameter question for validate_predictions7073', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_predictions7073', 'required': True}], 'prompt': '```xml\\nINSTRUCTION for the 2-th task:\\n\"\"\"\\nTo validate the predictions, first assess the provided {question} against established ground-truth solutions. Determine if the predictions reflect significant changes based on explicit criteria, such as a predefined threshold for accuracy or relevance. If discrepancies arise between the predictions and ground-truth, clarify the nature of these discrepancies and their implications for the assessment. Document your reasoning in the \\'thought\\' field, explaining how the predictions align or diverge from the expected outcomes, and whether the perturbation led to significant changes or had no effect. Provide the validated answer in the \\'answer\\' field, ensuring clarity and conciseness. If the answer is uncertain or ambiguous, indicate this clearly in the \\'answer\\' field.\\nFormat your output in xml format, such as xxx and xxx.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n", "\u001b[32m2026-01-05 09:46:08.386\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.069 | Total tokens: 380848 | Current cost: $0.001 | Current tokens: 5146\u001b[0m\n", "\u001b[32m2026-01-05 09:46:08.853\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.069 | Total tokens: 380939 | Current cost: $0.000 | Current tokens: 91\u001b[0m\n", "\u001b[32m2026-01-05 09:46:10.509\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.070 | Total tokens: 382168 | Current cost: $0.000 | Current tokens: 1229\u001b[0m\n", "\u001b[32m2026-01-05 09:46:10.510\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n", "\u001b[32m2026-01-05 09:46:10.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1054\u001b[0m - \u001b[1mEvaluate the workflow at step 7 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:37, 1.31it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:35, 1.37it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:35, 1.34it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:02<00:34, 1.35it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:03<00:32, 1.37it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:04<00:34, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:05<00:36, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:06<00:34, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:07<00:32, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:07<00:31, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:08<00:31, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:10<00:39, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:10<00:35, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:11<00:31, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:12<00:29, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:13<00:27, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:13<00:25, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:14<00:25, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:15<00:24, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:16<00:24, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:17<00:26, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:18<00:24, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:19<00:25, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:20<00:23, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:20<00:21, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:21<00:21, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:22<00:19, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:23<00:18, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:24<00:17, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:25<00:16, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:25<00:15, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:26<00:14, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:27<00:13, 1.30it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:28<00:13, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:29<00:12, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:30<00:11, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:30<00:11, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:31<00:10, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:32<00:09, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:33<00:07, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:33<00:06, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:34<00:06, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:35<00:05, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:36<00:05, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:37<00:04, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:38<00:03, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:39<00:02, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:40<00:02, 1.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:41<00:00, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:42<00:00, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:46:52.754\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1057\u001b[0m - \u001b[1mStep 7 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.88}\u001b[0m\n", "randomly update dataset\n", "\u001b[32m2026-01-05 09:46:52.756\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:46:54.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.076 | Total tokens: 418003 | Current cost: $0.001 | Current tokens: 5145\u001b[0m\n", "- The workflow lacks a validation step after generating the answer, which could help ensure the accuracy of predictions before finalizing the output.\n", "- There are multiple instances of incorrect predictions leading to incorrect solutions, indicating a failure in the computational logic or data handling within the workflow.\n", "- The workflow does not account for potential contradictions in the predictions and solutions, as evidenced by cases where the predicted answer is 'Yes' but the solution is 'No'.\n", "- The control flow does not include mechanisms to handle or report errors effectively, as seen in the numerous instances of incorrect computation results without any corrective action.\n", "- The prompt instructions for answering are not consistently followed, as some questions specify a strict format while others do not, leading to ambiguity in expected responses.\n", "\u001b[32m2026-01-05 09:46:55.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.076 | Total tokens: 418633 | Current cost: $0.000 | Current tokens: 630\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_predictions7073', 'args': ['answer'], 'outputs': ['validated_answer']}\n", "]\n", "```\n", "\u001b[32m2026-01-05 09:46:55.505\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n", "Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of PNN, does the expression profile of PNN indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb CCT8 and monitor TEAD4 expression. Decide whether this perturbation leads to a significant alteration in TEAD4 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to RPS12 and then measure expression of RBL1. Does this perturbation cause a significant change in RBL1 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb CSDE1 and monitor CSDE1 expression. Decide whether this perturbation leads to a significant alteration in CSDE1 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TUBGCP5 is perturbed and the expression of CDKN1A is measured. Does this perturbation cause a significant change in CDKN1A expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TUBE1 is perturbed and ZMAT3 expression is observed. Does this perturbation lead to a significant difference in ZMAT3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, AQR is perturbed and BRCA2 expression is measured. Determine whether BRCA2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PRDM4 and monitor MT-ND3 expression. Decide whether this perturbation leads to a significant alteration in MT-ND3 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb BYSL and examine the expression of RTKN2. Does perturbing BYSL lead to a significant change in RTKN2 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of MRPL27 is associated with a significant change in MT-ND2 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb NDUFA4 and monitor H1FX expression. Decide whether this perturbation leads to a significant alteration in H1FX expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, PPRC1 is perturbed and the expression of SNHG3 is measured. Does this perturbation cause a significant change in SNHG3 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb SAP18 and monitor JADE1 expression. Decide whether this perturbation leads to a significant alteration in JADE1 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SUDS3 and examine the expression of NME4. Does perturbing SUDS3 lead to a significant change in NME4 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, PPRC1 is perturbed and SNHG3 expression is quantified. Does this perturbation result in a significant change in SNHG3 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, ATF4 is perturbed and FEM1C expression is quantified. Does this perturbation result in a significant change in FEM1C expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NOM1 is perturbed and PCLAF expression is quantified. Does this perturbation result in a significant change in PCLAF expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of HMGCR, does the expression profile of HMGCR indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SP2 is perturbed and the expression of AC018521.5 is measured. Determine whether AC018521.5 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb CNN2 and monitor GAMT expression. Decide whether this perturbation leads to a significant alteration in GAMT expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of WDR43, does the expression profile of ZWINT indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which CNOT11 is perturbed and FHL2 expression is observed. Does this perturbation lead to a significant difference in FHL2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, PIK3C3 is perturbed and HMGCS1 expression is quantified. Does this perturbation result in a significant change in HMGCS1 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb OSTC and monitor OSTC expression. Decide whether this perturbation leads to a significant alteration in OSTC expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TAF5 and examine the expression of TACC3. Does perturbing TAF5 lead to a significant change in TACC3 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to MRPL17 and then measure expression of MT-ND3. Does this perturbation cause a significant change in MT-ND3 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of RPA1 is associated with a significant change in TMSB15A expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which POLR1E is perturbed and PCLAF expression is observed. Does this perturbation lead to a significant difference in PCLAF expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, CSDE1 is perturbed and the expression of CSDE1 is measured. Determine whether CSDE1 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, POLR2E is perturbed and the expression of POLR2E is measured. Does this perturbation cause a significant change in POLR2E expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TEX10 and examine the expression of NEK2. Does perturbing TEX10 lead to a significant change in NEK2 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of SNW1, does the expression profile of ZWINT indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, TUBA1C is perturbed and TUBA1C expression is quantified. Does this perturbation result in a significant change in TUBA1C expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, TEN1 is perturbed and SLC1A5 expression is measured. Determine whether SLC1A5 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, RPL7L1 is perturbed and RPL7L1 expression is measured. Determine whether RPL7L1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GINS1 is perturbed and the expression of CDKN1A is measured. Determine whether CDKN1A shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which MTPAP is perturbed and MT-ATP6 expression is observed. Does this perturbation lead to a significant difference in MT-ATP6 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which PRDM8 is perturbed and PLAC9 expression is observed. Does this perturbation lead to a significant difference in PLAC9 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which MRPS35 is perturbed and MT-CO1 expression is observed. Does this perturbation lead to a significant difference in MT-CO1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, RFC5 is perturbed and CDKN1A expression is measured. Determine whether CDKN1A exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb UCHL5 and monitor SNRNP40 expression. Decide whether this perturbation leads to a significant alteration in SNRNP40 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb TWISTNB and examine the expression of CCNB2. Does perturbing TWISTNB lead to a significant change in CCNB2 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SMG1 is perturbed and the expression of SNHG32 is measured. Determine whether SNHG32 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, GLRX5 is perturbed and GLRX5 expression is measured. Determine whether GLRX5 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to TMEM161B and then measure expression of IFT122. Does this perturbation cause a significant change in IFT122 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: No\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, CHTOP is perturbed and the expression of S100A13 is measured. Does this perturbation cause a significant change in S100A13 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of RPS19BP1, does the expression profile of CLN6 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb SUPV3L1 and examine the expression of MTRNR2L8. Does perturbing SUPV3L1 lead to a significant change in MTRNR2L8 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb RTF1 and examine the expression of POC1A. Does perturbing RTF1 lead to a significant change in POC1A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, ETF1 is perturbed and TRIB3 expression is measured. Determine whether TRIB3 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context provided in `{question}` to determine the best answer. Ensure that your response is clear and concise, directly addressing the question without any additional commentary or reasoning. If the answer is uncertain or ambiguous, indicate this clearly. Validate your answer against known correct solutions before finalizing it, documenting any discrepancies with the ground truth.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:46:57.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.077 | Total tokens: 423820 | Current cost: $0.001 | Current tokens: 5187\u001b[0m\n", "\u001b[32m2026-01-05 09:46:57.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.077 | Total tokens: 423916 | Current cost: $0.000 | Current tokens: 96\u001b[0m\n", "\u001b[32m2026-01-05 09:46:58.870\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.077 | Total tokens: 424862 | Current cost: $0.000 | Current tokens: 946\u001b[0m\n", "{'name': 'validate_predictions7073', 'description': 'Task to validate_predictions7073. Takes question as input. Produces validated_answer as output.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'Input parameter question for validate_predictions7073', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_predictions7073', 'required': True}], 'prompt': '```xml\\nINSTRUCTION for the 2-th task:\\n\"\"\"\\nTo validate the predictions, first assess the provided {question} against established ground-truth solutions. Determine if the predictions reflect significant changes based on explicit criteria, such as a predefined threshold for accuracy or relevance. If discrepancies arise between the predictions and ground-truth, clarify the nature of these discrepancies and their implications for the assessment. Document your reasoning in the \\'thought\\' field, explaining how the predictions align or diverge from the expected outcomes, and whether the perturbation led to significant changes or had no effect. Provide the validated answer in the \\'answer\\' field, ensuring clarity and conciseness. If the answer is uncertain or ambiguous, indicate this clearly in the \\'answer\\' field.\\nFormat your output in xml format, such as xxx and xxx.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n", "\u001b[32m2026-01-05 09:47:00.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.078 | Total tokens: 430078 | Current cost: $0.001 | Current tokens: 5216\u001b[0m\n", "\u001b[32m2026-01-05 09:47:00.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.078 | Total tokens: 430174 | Current cost: $0.000 | Current tokens: 96\u001b[0m\n", "\u001b[32m2026-01-05 09:47:02.662\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.079 | Total tokens: 431566 | Current cost: $0.000 | Current tokens: 1392\u001b[0m\n", "\u001b[32m2026-01-05 09:47:02.664\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n", "\u001b[32m2026-01-05 09:47:02.664\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1054\u001b[0m - \u001b[1mEvaluate the workflow at step 8 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:41, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:45, 1.06it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:41, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:40, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:39, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:41, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:06<00:40, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:07<00:37, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:07<00:35, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:08<00:33, 1.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:09<00:33, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:10<00:31, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:11<00:31, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:12<00:30, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:12<00:28, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:13<00:26, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:14<00:24, 1.34it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:15<00:24, 1.32it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:15<00:24, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:16<00:25, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:17<00:24, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:18<00:23, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:19<00:21, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:20<00:23, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:21<00:20, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:21<00:18, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:22<00:17, 1.34it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:23<00:16, 1.31it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:23<00:15, 1.33it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:25<00:16, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:25<00:15, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:26<00:14, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:27<00:15, 1.13it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:28<00:14, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:29<00:12, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:29<00:10, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:30<00:10, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:31<00:09, 1.31it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:32<00:08, 1.34it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:33<00:08, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:33<00:07, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:34<00:06, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:35<00:05, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:36<00:04, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:37<00:03, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:38<00:03, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:39<00:02, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:40<00:02, 1.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:41<00:00, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:42<00:00, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:47:44.711\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1057\u001b[0m - \u001b[1mStep 8 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.86}\u001b[0m\n", "randomly update dataset\n", "\u001b[32m2026-01-05 09:47:44.713\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:47:46.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.085 | Total tokens: 467399 | Current cost: $0.001 | Current tokens: 5170\u001b[0m\n", "- The workflow lacks a step for handling or addressing errors in predictions, leading to unhandled cases where incorrect solutions are reported without any corrective action.\n", "- There is no validation step to ensure that the generated answer aligns with the required format ('Final Answer: Yes' or 'Final Answer: No'), which could lead to inconsistencies in output.\n", "- The control flow does not account for the possibility of multiple questions being processed sequentially, which may lead to confusion or errors in tracking the context of each question.\n", "- The assumption that all questions can be answered with a simple 'Yes' or 'No' may not hold true for all cases, potentially leading to oversimplification of complex biological data.\n", "- The workflow does not include any mechanism for cross-referencing or verifying the accuracy of the predictions against established biological knowledge or datasets, which could result in misleading conclusions.\n", "\u001b[32m2026-01-05 09:47:47.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.085 | Total tokens: 468080 | Current cost: $0.000 | Current tokens: 681\u001b[0m\n", "```python\n", "steps = [\n", " {'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", " {'name': 'validate_predictions7073', 'args': ['answer'], 'outputs': ['validated_answer']},\n", " {'name': 'error_handling', 'args': ['validated_answer'], 'outputs': ['final_answer']}\n", "]\n", "```\n", "\u001b[32m2026-01-05 09:47:47.708\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer']\u001b[0m\n", "Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, NUP88 is perturbed and MKI67 expression is quantified. Does this perturbation result in a significant change in MKI67 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CENPN is perturbed and CMC2 expression is quantified. Does this perturbation result in a significant change in CMC2 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, GPN2 is perturbed and the expression of C16orf74 is measured. Does this perturbation cause a significant change in C16orf74 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, NUTF2 is perturbed and NUTF2 expression is measured. Determine whether NUTF2 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of CHMP1A is associated with a significant change in CIT expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PPIE is associated with a significant change in KIF15 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to EXOC5 and then measure expression of CLIC1. Does this perturbation cause a significant change in CLIC1 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, ZNRD1 is perturbed and FAM111B expression is quantified. Does this perturbation result in a significant change in FAM111B expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SHC1 and then measure expression of SHC1. Does this perturbation cause a significant change in SHC1 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, LUC7L3 is perturbed and the expression of CD2BP2 is measured. Does this perturbation cause a significant change in CD2BP2 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DCTN1 and then measure expression of DCTN1. Does this perturbation cause a significant change in DCTN1 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to RPS3A and then measure expression of XRCC2. Does this perturbation cause a significant change in XRCC2 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb IFITM2 and examine the expression of CHRNA5. Does perturbing IFITM2 lead to a significant change in CHRNA5 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of EXOSC2 is associated with a significant change in IMPA2 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb GTPBP4 and examine the expression of CDKN1A. Does perturbing GTPBP4 lead to a significant change in CDKN1A expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, MIS12 is perturbed and STEAP1 expression is quantified. Does this perturbation result in a significant change in STEAP1 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of EHMT2, does the expression profile of LDHA indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which TUBA1C is perturbed and SMPD1 expression is observed. Does this perturbation lead to a significant difference in SMPD1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to BUB1B and then measure expression of CDKN1A. Does this perturbation cause a significant change in CDKN1A expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb EGLN2 and monitor LDHA expression. Decide whether this perturbation leads to a significant alteration in LDHA expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GUCD1 is perturbed and the expression of SUGCT is measured. Determine whether SUGCT shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of SKA1 is associated with a significant change in SKA1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of LAMTOR3 is associated with a significant change in PRDX1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, GTF3C4 is perturbed and the expression of PICALM is measured. Determine whether PICALM shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, USP9X is perturbed and USP9X expression is measured. Determine whether USP9X exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which ATP5MD is perturbed and ATAD5 expression is observed. Does this perturbation lead to a significant difference in ATAD5 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to BRIX1 and then measure expression of STEAP1. Does this perturbation cause a significant change in STEAP1 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which HSP90B1 is perturbed and PDIA4 expression is observed. Does this perturbation lead to a significant difference in PDIA4 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TPR is perturbed and the expression of VSIR is measured. Does this perturbation cause a significant change in VSIR expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which RACK1 is perturbed and NCAPG expression is observed. Does this perturbation lead to a significant difference in NCAPG expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CDK2 and then measure expression of UGGT2. Does this perturbation cause a significant change in UGGT2 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of UTP15, does the expression profile of CDCA3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, POT1 is perturbed and the expression of DEPDC1 is measured. Does this perturbation cause a significant change in DEPDC1 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to ARL4D and then measure expression of CHRDL1. Does this perturbation cause a significant change in CHRDL1 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, NACA is perturbed and the expression of RAD51 is measured. Does this perturbation cause a significant change in RAD51 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of CAD is associated with a significant change in CAD expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to ANKRD17 and then measure expression of NAPEPLD. Does this perturbation cause a significant change in NAPEPLD expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of LARS, does the expression profile of DDIT3 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, CTCF is perturbed and the expression of SHROOM3 is measured. Determine whether SHROOM3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, RPSA is perturbed and AURKA expression is measured. Determine whether AURKA exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, EFR3A is perturbed and the expression of EFR3A is measured. Determine whether EFR3A shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which BRK1 is perturbed and RTL10 expression is observed. Does this perturbation lead to a significant difference in RTL10 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ITGB1BP1, does the expression profile of FKBP5 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, RPL34 is perturbed and SKA3 expression is quantified. Does this perturbation result in a significant change in SKA3 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, EIF3J is perturbed and the expression of EIF3J is measured. Determine whether EIF3J shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which CDK2 is perturbed and CDK2 expression is observed. Does this perturbation lead to a significant difference in CDK2 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which VHL is perturbed and PLAAT4 expression is observed. Does this perturbation lead to a significant difference in PLAAT4 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which NUF2 is perturbed and PIF1 expression is observed. Does this perturbation lead to a significant difference in PIF1 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of BCAR1, does the expression profile of C1QL1 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of PSMD6 is associated with a significant change in ITGBL1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context provided in `{question}` to determine the best answer. Ensure that your response is clear and concise, directly addressing the question without any additional commentary or reasoning. If the answer is uncertain or ambiguous, indicate this clearly. Validate your answer against known correct solutions before finalizing it, documenting any discrepancies with the ground truth.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:47:50.371\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.086 | Total tokens: 473268 | Current cost: $0.001 | Current tokens: 5188\u001b[0m\n", "\u001b[32m2026-01-05 09:47:50.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.086 | Total tokens: 473364 | Current cost: $0.000 | Current tokens: 96\u001b[0m\n", "\u001b[32m2026-01-05 09:47:52.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.086 | Total tokens: 474412 | Current cost: $0.000 | Current tokens: 1048\u001b[0m\n", "{'name': 'validate_predictions7073', 'description': 'Task to validate_predictions7073. Takes question as input. Produces validated_answer as output.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'Input parameter question for validate_predictions7073', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_predictions7073', 'required': True}], 'prompt': '```xml\\nINSTRUCTION for the 2-th task:\\n\"\"\"\\nTo validate the predictions, first assess the provided {question} against established ground-truth solutions. Determine if the predictions reflect significant changes based on explicit criteria, such as a predefined threshold for accuracy or relevance. If discrepancies arise between the predictions and ground-truth, clarify the nature of these discrepancies and their implications for the assessment. Document your reasoning in the \\'thought\\' field, explaining how the predictions align or diverge from the expected outcomes, and whether the perturbation led to significant changes or had no effect. Provide the validated answer in the \\'answer\\' field, ensuring clarity and conciseness. If the answer is uncertain or ambiguous, indicate this clearly in the \\'answer\\' field.\\nFormat your output in xml format, such as xxx and xxx.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n", "\u001b[32m2026-01-05 09:47:53.961\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.087 | Total tokens: 479621 | Current cost: $0.001 | Current tokens: 5209\u001b[0m\n", "\u001b[32m2026-01-05 09:47:54.532\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.087 | Total tokens: 479718 | Current cost: $0.000 | Current tokens: 97\u001b[0m\n", "\u001b[32m2026-01-05 09:47:56.386\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.088 | Total tokens: 481282 | Current cost: $0.000 | Current tokens: 1564\u001b[0m\n", "{'name': 'error_handling7856', 'description': 'Task to error_handling7856. Takes validated_answer as input. Produces final_answer as output.', 'inputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Input parameter validated_answer for error_handling7856', 'required': False}], 'outputs': [{'name': 'final_answer', 'type': 'str', 'description': 'Output parameter final_answer from error_handling7856', 'required': True}], 'prompt': 'Your are a task solver.', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n", "\u001b[32m2026-01-05 09:47:57.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.089 | Total tokens: 486487 | Current cost: $0.001 | Current tokens: 5205\u001b[0m\n", "\u001b[32m2026-01-05 09:47:58.498\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.089 | Total tokens: 486591 | Current cost: $0.000 | Current tokens: 104\u001b[0m\n", "\u001b[32m2026-01-05 09:47:59.881\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.089 | Total tokens: 487184 | Current cost: $0.000 | Current tokens: 593\u001b[0m\n", "\u001b[32m2026-01-05 09:47:59.882\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer']\u001b[0m\n", "\u001b[32m2026-01-05 09:47:59.883\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1054\u001b[0m - \u001b[1mEvaluate the workflow at step 9 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:42, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:42, 1.14it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:40, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:38, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:04<00:38, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:05<00:42, 1.04it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:07<00:50, 1.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:08<00:46, 1.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:08<00:41, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:09<00:40, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:10<00:35, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:11<00:34, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:12<00:33, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:13<00:30, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:14<00:31, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:14<00:30, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:15<00:28, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:16<00:26, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:17<00:24, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:17<00:22, 1.31it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:18<00:22, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:19<00:20, 1.33it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:20<00:20, 1.33it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:21<00:26, 1.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:22<00:23, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:23<00:20, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:23<00:19, 1.18it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:24<00:18, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:25<00:16, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:26<00:15, 1.31it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:26<00:14, 1.31it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:27<00:13, 1.37it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:28<00:13, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:29<00:12, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:30<00:11, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:30<00:11, 1.26it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:31<00:10, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:32<00:08, 1.35it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:33<00:08, 1.35it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:33<00:07, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:34<00:06, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:35<00:06, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:36<00:05, 1.31it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:37<00:04, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:37<00:04, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:38<00:03, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:39<00:02, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:40<00:01, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:41<00:00, 1.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:41<00:00, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:48:41.876\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1057\u001b[0m - \u001b[1mStep 9 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.78}\u001b[0m\n", "randomly update dataset\n", "\u001b[32m2026-01-05 09:48:41.878\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:48:43.676\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.095 | Total tokens: 523043 | Current cost: $0.001 | Current tokens: 5150\u001b[0m\n", "- The workflow lacks a validation step after generating the answer, which could help ensure the correctness of the predictions before finalizing the output.\n", "- There is an inconsistency in the execution history where multiple questions resulted in incorrect solutions despite having a high prediction score, indicating potential flaws in the underlying computation or logic used to derive answers.\n", "- The workflow does not account for the possibility of ambiguous or unclear questions, which may lead to misinterpretation and incorrect predictions.\n", "- The control flow does not include mechanisms to handle cases where the predictions and solutions diverge, leading to unaddressed errors in the final output.\n", "- The prompt structure does not specify how to handle situations where the expression of the perturbed gene is measured against itself, which could lead to misleading conclusions.\n", "\u001b[32m2026-01-05 09:48:44.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.095 | Total tokens: 523678 | Current cost: $0.000 | Current tokens: 635\u001b[0m\n", "```python\n", "steps = [\n", "{'name': 'generate_answer', 'args': ['question'], 'outputs': ['answer']},\n", "{'name': 'validate_predictions7073', 'args': ['answer'], 'outputs': ['validated_answer']}\n", "]\n", "```\n", "\u001b[32m2026-01-05 09:48:44.471\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n", "Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb COG6 and examine the expression of NFIC. Does perturbing COG6 lead to a significant change in NFIC expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to ZCRB1 and then measure expression of PPHLN1. Does this perturbation cause a significant change in PPHLN1 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb RTRAF and monitor HLA-C expression. Decide whether this perturbation leads to a significant alteration in HLA-C expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, MRPL32 is perturbed and MT-ATP8 expression is quantified. Does this perturbation result in a significant change in MT-ATP8 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, MRPL49 is perturbed and C1QL1 expression is measured. Determine whether C1QL1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of FBXW7, does the expression profile of TMEM14C indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of GRSF1 is associated with a significant change in GRSF1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, GSPT1 is perturbed and DDIT4 expression is quantified. Does this perturbation result in a significant change in DDIT4 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, UBL5 is perturbed and the expression of UBL5 is measured. Does this perturbation cause a significant change in UBL5 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, GPN3 is perturbed and SIX1 expression is quantified. Does this perturbation result in a significant change in SIX1 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, RPL28 is perturbed and the expression of RPL28 is measured. Does this perturbation cause a significant change in RPL28 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: No\n", "Solutions: Yes\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, CDC42 is perturbed and KIF15 expression is measured. Determine whether KIF15 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb DCTN2 and examine the expression of CKS1B. Does perturbing DCTN2 lead to a significant change in CKS1B expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, BCR is perturbed and the expression of WDR60 is measured. Does this perturbation cause a significant change in WDR60 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, C1orf131 is perturbed and KNL1 expression is measured. Determine whether KNL1 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb GPS1 and examine the expression of MDM2. Does perturbing GPS1 lead to a significant change in MDM2 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of OIP5 is associated with a significant change in NUSAP1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb GMPS and examine the expression of C11orf74. Does perturbing GMPS lead to a significant change in C11orf74 expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, CRKL is perturbed and the expression of NAT9 is measured. Does this perturbation cause a significant change in NAT9 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, C1orf109 is perturbed and the expression of C1orf109 is measured. Determine whether C1orf109 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, SEH1L is perturbed and DNAJC9 expression is measured. Determine whether DNAJC9 exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, BAG6 is perturbed and the expression of DBI is measured. Determine whether DBI shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, ARMC6 is perturbed and ARMC6 expression is quantified. Does this perturbation result in a significant change in ARMC6 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which H2AFX is perturbed and H2AFX expression is observed. Does this perturbation lead to a significant difference in H2AFX expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, PHF5A is perturbed and the expression of IMPA2 is measured. Determine whether IMPA2 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in gene regulation studies. For experiments performed in K562 cells, PSMD2 is perturbed and CENPW expression is measured. Determine whether CENPW exhibits a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, MRPL46 is perturbed and the expression of VAMP5 is measured. Does this perturbation cause a significant change in VAMP5 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DHX36 is perturbed and DHX36 expression is quantified. Does this perturbation result in a significant change in DHX36 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, SON is perturbed and DNAJB9 expression is quantified. Does this perturbation result in a significant change in DNAJB9 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of IGBP1, does the expression profile of SERPINB2 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a domain expert in functional genomics. For experiments carried out in K562 cells, we perturb CHMP2A and examine the expression of HMMR. Does perturbing CHMP2A lead to a significant change in HMMR expression? Reply only with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert analyst of perturbation datasets. For K562 cells subjected to perturbation of ZCRB1, does the expression profile of ZCRB1 indicate a significant change relative to control conditions? Reply strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, PES1 is perturbed and the expression of AMTN is measured. Does this perturbation cause a significant change in AMTN expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, PSMC6 is perturbed and the expression of POLR2D is measured. Does this perturbation cause a significant change in POLR2D expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, DHX15 is perturbed and RPL22L1 expression is quantified. Does this perturbation result in a significant change in RPL22L1 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to SMC5 and then measure expression of ERCC6L. Does this perturbation cause a significant change in ERCC6L expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb POLR2A and monitor RAB31 expression. Decide whether this perturbation leads to a significant alteration in RAB31 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to GTF2F2 and then measure expression of PLIN2. Does this perturbation cause a significant change in PLIN2 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, LYRM4 is perturbed and the expression of NMU is measured. Does this perturbation cause a significant change in NMU expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, SEM1 is perturbed and the expression of BIRC5 is measured. Determine whether BIRC5 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, RPS19 is perturbed and the expression of RPS19 is measured. Determine whether RPS19 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, NBAS is perturbed and the expression of SCD is measured. Does this perturbation cause a significant change in SCD expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to DDX51 and then measure expression of ZBTB25. Does this perturbation cause a significant change in ZBTB25 expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in regulatory genomics. Consider data from K562 cells in which UQCRQ is perturbed and KRTAP2-3 expression is observed. Does this perturbation lead to a significant difference in KRTAP2-3 expression relative to control conditions? Answer strictly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are assisting with the interpretation of perturbation-based expression data. In K562 cells, CDC45 is perturbed and SHMT1 expression is quantified. Does this perturbation result in a significant change in SHMT1 expression compared with control cells? Respond exactly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are a specialist in gene perturbation experiments. In K562 cells, we introduce a perturbation to CHTOP and then measure expression of CHTOP. Does this perturbation cause a significant change in CHTOP expression? Reply only in the form 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are an expert in single-cell biology and functional genomics. In K562 cells, TRMT10C is perturbed and the expression of TRMT10C is measured. Does this perturbation cause a significant change in TRMT10C expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a genomics expert evaluating perturbation experiments. In K562 cells, MCL1 is perturbed and the expression of GSTM3 is measured. Determine whether GSTM3 shows a significant expression change under this perturbation. Respond exactly with 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.Questions: Question: You are a functional genomics specialist. In K562 cells, we perturb PSMC3 and monitor CEP128 expression. Decide whether this perturbation leads to a significant alteration in CEP128 expression. Answer only in the format 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: No\n", "Score: 0.0\n", "Error reason: Computation result is incorrect.Questions: Question: You are an expert in gene expression analysis. For K562 cells, assess whether perturbation of ANAPC10 is associated with a significant change in RBL1 expression compared with unperturbed controls. Answer strictly as 'Final Answer: Yes' or 'Final Answer: No'.\n", "\n", "Answer:\n", "Predictions: Final Answer: Yes\n", "Solutions: Yes\n", "Score: 1.0\n", "The solution is correct.\n", "{'name': 'generate_answer', 'description': 'Extract and formulate an answer from the given context.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'The question that needs to be answered.', 'required': True}], 'outputs': [{'name': 'answer', 'type': 'str', 'description': 'The direct answer to the question.', 'required': True}], 'prompt': '\"\"\"\\nUse the context provided in `{question}` to determine the best answer. Ensure that your response is clear and concise, directly addressing the question without any additional commentary or reasoning. If the answer is uncertain or ambiguous, indicate this clearly. Validate your answer against known correct solutions before finalizing it, documenting any discrepancies with the ground truth.\\n\"\"\"', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': 'Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.'}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[32m2026-01-05 09:48:45.855\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.096 | Total tokens: 528884 | Current cost: $0.001 | Current tokens: 5206\u001b[0m\n", "\u001b[32m2026-01-05 09:48:46.362\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.096 | Total tokens: 528980 | Current cost: $0.000 | Current tokens: 96\u001b[0m\n", "\u001b[32m2026-01-05 09:48:47.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.096 | Total tokens: 530061 | Current cost: $0.000 | Current tokens: 1081\u001b[0m\n", "{'name': 'validate_predictions7073', 'description': 'Task to validate_predictions7073. Takes question as input. Produces validated_answer as output.', 'inputs': [{'name': 'question', 'type': 'str', 'description': 'Input parameter question for validate_predictions7073', 'required': False}], 'outputs': [{'name': 'validated_answer', 'type': 'str', 'description': 'Output parameter validated_answer from validate_predictions7073', 'required': True}], 'prompt': '```xml\\nINSTRUCTION for the 2-th task:\\n\"\"\"\\nTo validate the predictions, first assess the provided {question} against established ground-truth solutions. Determine if the predictions reflect significant changes based on explicit criteria, such as a predefined threshold for accuracy or relevance. If discrepancies arise between the predictions and ground-truth, clarify the nature of these discrepancies and their implications for the assessment. Document your reasoning in the \\'thought\\' field, explaining how the predictions align or diverge from the expected outcomes, and whether the perturbation led to significant changes or had no effect. Provide the validated answer in the \\'answer\\' field, ensuring clarity and conciseness. If the answer is uncertain or ambiguous, indicate this clearly in the \\'answer\\' field.\\nFormat your output in xml format, such as xxx and xxx.\\n\"\"\"\\n```', 'prompt_template': {'class_name': 'StringTemplate', 'instruction': \"Think step by step to answer the question based on the question context. You should integrate context for answering. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"}, 'system_prompt': 'You are a helpful and highly intelligent assistant.', 'parse_mode': 'xml', 'parse_func': None, 'parse_title': None, 'tool_names': None, 'tools': None}\n", "\u001b[32m2026-01-05 09:48:49.469\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.097 | Total tokens: 535338 | Current cost: $0.001 | Current tokens: 5277\u001b[0m\n", "\u001b[32m2026-01-05 09:48:50.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.097 | Total tokens: 535434 | Current cost: $0.000 | Current tokens: 96\u001b[0m\n", "\u001b[32m2026-01-05 09:48:51.873\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.models.model_utils\u001b[0m:\u001b[36mupdate_cost\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mTotal cost: $0.098 | Total tokens: 537160 | Current cost: $0.000 | Current tokens: 1726\u001b[0m\n", "\u001b[32m2026-01-05 09:48:51.874\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n", "\u001b[32m2026-01-05 09:48:51.874\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1054\u001b[0m - \u001b[1mEvaluate the workflow at step 10 ...\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 2%|▏ | 1/50 [00:00<00:35, 1.37it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 2/50 [00:01<00:36, 1.33it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 3/50 [00:02<00:34, 1.34it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 4/50 [00:03<00:39, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 5/50 [00:11<02:33, 3.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 6/50 [00:11<01:49, 2.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 7/50 [00:12<01:23, 1.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 8/50 [00:14<01:21, 1.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 9/50 [00:15<01:03, 1.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 10/50 [00:16<00:52, 1.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 11/50 [00:16<00:43, 1.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 12/50 [00:17<00:38, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 13/50 [00:18<00:34, 1.08it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 14/50 [00:18<00:30, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 15/50 [00:19<00:29, 1.17it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 16/50 [00:20<00:30, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 17/50 [00:21<00:27, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 18/50 [00:22<00:25, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 19/50 [00:24<00:43, 1.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 20/50 [00:25<00:35, 1.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 21/50 [00:26<00:31, 1.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 22/50 [00:27<00:28, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 23/50 [00:28<00:26, 1.03it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 24/50 [00:29<00:23, 1.11it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 25/50 [00:29<00:22, 1.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 26/50 [00:30<00:19, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 27/50 [00:31<00:18, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 28/50 [00:32<00:17, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 29/50 [00:32<00:16, 1.30it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 30/50 [00:33<00:15, 1.31it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 31/50 [00:34<00:16, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 32/50 [00:35<00:14, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 33/50 [00:36<00:13, 1.29it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 34/50 [00:36<00:12, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 35/50 [00:37<00:11, 1.27it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 36/50 [00:38<00:11, 1.25it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 37/50 [00:39<00:10, 1.20it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 38/50 [00:40<00:11, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 39/50 [00:41<00:10, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 40/50 [00:42<00:09, 1.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 41/50 [00:43<00:07, 1.15it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 42/50 [00:43<00:06, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 43/50 [00:44<00:05, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 44/50 [00:45<00:05, 1.19it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 45/50 [00:46<00:04, 1.22it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 46/50 [00:47<00:03, 1.23it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 47/50 [00:47<00:02, 1.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 48/50 [00:48<00:01, 1.16it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 49/50 [00:49<00:00, 1.07it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 50/50 [00:50<00:00, 1.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:49:42.670\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1057\u001b[0m - \u001b[1mStep 10 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.88}\u001b[0m\n", "randomly update dataset\n", "\u001b[32m2026-01-05 09:49:42.672\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1064\u001b[0m - \u001b[1mReach the maximum number of steps 10. Stop the optimization.\u001b[0m\n", "\u001b[32m2026-01-05 09:49:42.672\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m1067\u001b[0m - \u001b[1mRestore the best graph from the snapshot ...\u001b[0m\n", "\u001b[32m2026-01-05 09:49:42.672\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n", "\u001b[32m2026-01-05 09:49:42.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36mrestore_best_graph\u001b[0m:\u001b[36m1216\u001b[0m - \u001b[1mRestore the best graph from snapshot with metrics {'f1': 0.0, 'em': 0.0, 'acc': 0.88} ...\u001b[0m\n", "\u001b[32m2026-01-05 09:49:42.674\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n", "\u001b[32m2026-01-05 09:49:42.674\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.qastructure_optimizer\u001b[0m:\u001b[36mrestore_best_graph\u001b[0m:\u001b[36m1216\u001b[0m - \u001b[1mRestore the best graph from snapshot with metrics {'f1': 0.0, 'em': 0.0, 'acc': 0.88} ...\u001b[0m\n", "\u001b[32m2026-01-05 09:49:42.674\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36msave_module\u001b[0m:\u001b[36m1204\u001b[0m - \u001b[1mSaving SequentialWorkFlowGraph to ./debug/save_10_noreason_reploge.json\u001b[0m\n", "\u001b[32m2026-01-05 09:49:42.679\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.agents.customize_agent\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m92\u001b[0m - \u001b[33m\u001b[1mBoth `prompt` and `prompt_template` are provided in `CustomizeAgent`. `prompt_template` will be used.\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "Evaluating workflow: 0%| | 0/3000 [00:00\n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637335.548572591)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637336.423991661)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637338.130238772)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637339.594542294)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637340.38089388)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637341.142935979)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637341.815225513)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637333.971553579)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637342.557382733)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637334.710684254)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637343.330939196)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637344.101844681)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637345.229101465)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637346.987328824)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637347.657788183)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637348.350473113)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637350.060637552)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637351.035777079)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637351.836289513)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637352.517354208)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637353.342832627)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637354.064431871)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637354.841911025)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637357.008634454)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637356.057691038)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637346.035667837)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637349.231339033)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637358.851610923)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1637359.533751443)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637360.970993336)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637362.058262743)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637362.751166216)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637363.530609437)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637364.444697833)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637365.14607843)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637365.839796239)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637366.536662461)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637367.980788362)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637367.235283116)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637357.973907986)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637360.30894189)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637368.764620519)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637369.813577105)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637371.426910193)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637372.115959205)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637372.965391169)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637374.70895884)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637375.466527603)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637401.478241751)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637370.700399448)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637373.688980347)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637402.212080245)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637402.964697844)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637404.619609912)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637405.213766409)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637405.935514568)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637407.451095727)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637408.405659492)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637409.217783322)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637410.640951964)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637411.314250922)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637412.085380435)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637412.865319039)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637403.801033462)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637406.672451705)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637410.00995937)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637413.857406637)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637414.583960651)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637415.221821631)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637415.982680873)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637416.956623721)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637417.709516435)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637418.843689666)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637419.698019737)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637421.094933097)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637421.903185702)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637422.81661322)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637424.673327672)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637425.487661424)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637420.395563513)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637423.710658031)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637426.186923987)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637427.184112608)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637428.275028947)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637429.10923222)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637429.888607282)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637430.895672435)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637431.912967655)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637432.66339347)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637433.816948146)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637434.543284779)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637436.057449222)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637437.009510197)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637437.97012716)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637439.610565461)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637435.259427991)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637438.70324527)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637440.315919705)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637441.179049204)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637466.777402139)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637467.621531281)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637468.448625578)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637469.475303925)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637470.165170673)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637472.566837598)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637473.426157935)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637442.90277325)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637471.455460081)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637474.114955069)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637474.879217153)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637475.686317093)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637476.363122396)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637477.121215051)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637479.55947269)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637480.301407366)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637481.403954857)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637482.35260259)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637483.089404618)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637484.044080198)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637484.940036608)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637485.716832715)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637487.214335857)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637486.453757316)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637478.745676542)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637487.977764412)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637488.622172776)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637490.159410336)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637490.929920813)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637491.815305979)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637493.216775197)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637494.005708304)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637494.852460279)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637495.573864242)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637496.405088776)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637497.264769656)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637498.057234732)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637498.802104272)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637500.089831535)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637489.325102994)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637492.505298812)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637501.269105387)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637502.380203512)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637503.076973514)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637504.545223126)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637505.386869055)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637506.17267057)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637507.114979611)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637507.826790637)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637508.623123665)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637520.79408783)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637521.638367352)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637523.218360667)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637503.78565269)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637522.445506866)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637524.052525044)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637524.800137779)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637526.246930802)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637527.318249166)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637528.158396453)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637530.100840751)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637530.980735054)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637531.797575058)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637532.648449209)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637533.722693058)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637534.551629661)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637535.494462558)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637537.670590041)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637536.726460911)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637525.569960338)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1637528.898864796)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637539.192269085)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637539.970371631)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637541.465448074)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637542.286632858)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637542.966532776)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637543.863856173)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637544.627958658)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637545.36453982)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637546.101803149)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637546.952418126)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637549.116630373)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637548.441582397)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637538.471851481)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637540.807665446)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637549.792806711)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637550.595104794)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637552.555792645)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637553.40982304)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637554.624640389)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637556.404357943)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637557.100958773)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637557.890481065)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637558.749432331)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637559.545652211)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637560.438735595)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637561.202390059)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637562.179368463)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1637563.820264602)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637551.557196807)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637555.546855542)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637575.151882756)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637575.860544926)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637576.521397237)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637577.211466737)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637578.201378976)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637579.841471462)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637580.657644146)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637581.431875072)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637583.436629147)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637584.368375785)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637585.36479301)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637586.103479327)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637579.086346829)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637582.316195634)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637586.895120086)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637587.667052754)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637588.359806252)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637589.094997626)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637589.837802676)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637590.64402018)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637591.392697744)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637592.160351673)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637593.5704045)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637594.395527243)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637595.127589829)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1637597.245183379)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637597.94730253)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637592.807120064)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637596.142555254)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637598.804231806)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637599.787358698)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637600.59523675)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637601.396500013)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637602.32430985)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637603.048433295)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637603.783911958)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637604.547150511)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637605.302892571)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637606.094786084)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637607.69858348)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637608.453927904)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637609.214744989)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637610.620278144)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637606.877732345)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637609.909829154)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637611.362042322)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637612.045353289)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637613.714818552)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637614.475956207)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637626.466240692)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637629.572364188)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637630.241606577)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637631.907526682)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637612.957966997)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637627.237146445)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637627.912395191)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637628.640812082)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637631.146821886)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637632.694086271)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637633.331453256)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637634.924980586)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637635.711537246)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637636.540868845)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637638.118200923)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637638.968666446)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637639.650396408)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637640.486961748)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637641.33764482)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637642.601168337)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637643.526472804)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637644.35325786)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637645.387673141)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637634.083311093)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637637.281859762)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637646.47357345)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637647.262260861)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637648.067959869)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637649.815813806)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637650.573540249)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637651.302316011)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637652.27549213)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1637653.346944981)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637654.031821079)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637654.712699845)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637655.332970606)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637657.056926676)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637658.034158145)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637659.479551007)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637656.026106283)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637649.154888126)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637658.84737624)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637661.483825942)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637662.268308136)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637663.938174412)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637664.859702688)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637665.600421088)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637666.348145178)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637667.3856962)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637679.098544693)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637680.169575665)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637681.094237256)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637660.344733016)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637663.222053076)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637682.741437852)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637683.406903867)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637684.859043362)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637685.545010091)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637686.247112139)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637688.022800381)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637688.865241227)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637689.518809877)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637690.362230818)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637691.010321054)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637692.041587919)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637693.012579092)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637693.957858685)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637694.691395537)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637684.067542788)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637687.269037999)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637695.740071423)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637696.814331961)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637697.491504063)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637699.156339071)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637699.955552577)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637700.789634886)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637701.562928592)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637702.179385372)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637703.028930165)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637704.102610739)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637704.877609389)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637706.219045343)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637706.95287384)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637708.375531379)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637705.549301432)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637698.344337936)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637707.686617787)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637710.161499354)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637710.97969526)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1637712.701320318)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637713.464450607)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637714.219345341)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637714.900557098)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637715.757929782)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637716.49129441)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637717.28988283)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637718.221704877)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637719.816235827)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637719.038686917)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637709.113798831)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637711.808936599)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637730.807217143)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637731.514968518)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637732.27934406)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637733.017450691)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637733.721347833)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637734.642568959)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637735.621940701)])']\n", "connector: \n", "Evaluating workflow: 0%| | 7/3000 [00:32<3:58:43, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:50:15.481\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 8/3000 [00:37<3:59:12, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:50:20.302\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 9/3000 [00:43<4:08:14, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:50:25.684\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 10/3000 [00:47<3:59:48, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:50:30.121\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 11/3000 [00:52<3:56:13, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:50:34.703\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 12/3000 [00:56<3:51:02, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:50:39.109\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 13/3000 [01:00<3:48:48, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:50:43.605\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 14/3000 [01:05<3:47:48, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:50:48.139\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 0%| | 15/3000 [01:10<3:50:28, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:50:52.900\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 16/3000 [01:15<4:01:14, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:50:58.257\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 17/3000 [01:19<3:53:54, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:51:02.622\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 18/3000 [01:24<3:48:18, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:51:06.957\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 19/3000 [01:28<3:42:52, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:51:11.192\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 20/3000 [01:33<3:48:10, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:51:16.039\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 21/3000 [01:39<4:04:15, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:51:21.717\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 22/3000 [01:43<4:01:10, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:51:26.435\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 23/3000 [01:48<3:52:43, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:51:30.732\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 24/3000 [01:53<4:01:13, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:51:35.999\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 25/3000 [01:58<4:02:42, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:51:40.968\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 26/3000 [02:03<4:11:04, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:51:46.431\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 27/3000 [02:07<3:56:35, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:51:50.528\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 28/3000 [02:12<3:47:34, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:51:54.701\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 29/3000 [02:19<4:27:10, 5.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:52:01.966\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 30/3000 [02:23<4:12:56, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:52:06.410\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 31/3000 [02:28<4:00:51, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:52:10.711\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 32/3000 [02:32<3:54:25, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:52:15.151\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 33/3000 [02:37<4:05:27, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:52:20.639\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 34/3000 [02:42<3:55:33, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:52:24.941\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 35/3000 [02:46<3:48:11, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:52:29.214\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 36/3000 [02:51<3:49:46, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:52:33.945\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%| | 37/3000 [02:56<3:54:22, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:52:38.912\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%|▏ | 38/3000 [03:01<4:04:35, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:52:44.352\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%|▏ | 39/3000 [03:06<3:57:40, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:52:48.845\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%|▏ | 40/3000 [03:11<4:10:09, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:52:54.510\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%|▏ | 41/3000 [03:16<4:09:08, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:52:59.518\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%|▏ | 42/3000 [03:21<4:03:37, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:53:04.203\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%|▏ | 43/3000 [03:41<7:48:22, 9.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:53:24.352\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 1%|▏ | 44/3000 [03:46<6:31:56, 7.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:53:28.694\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 45/3000 [03:50<5:41:23, 6.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:53:33.238\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 46/3000 [03:55<5:10:05, 6.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:53:38.058\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 47/3000 [04:00<4:49:54, 5.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:53:42.997\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 48/3000 [04:04<4:23:58, 5.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:53:47.137\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 49/3000 [04:08<4:10:31, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:53:51.596\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 50/3000 [04:13<4:02:23, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:53:56.144\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 51/3000 [04:18<4:00:55, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:54:00.981\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 52/3000 [04:23<3:58:48, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:54:05.745\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 53/3000 [04:28<4:00:55, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:54:10.754\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 54/3000 [04:32<3:58:22, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:54:15.491\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 55/3000 [04:37<3:50:34, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:54:19.822\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 56/3000 [04:44<4:27:55, 5.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:54:27.062\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 57/3000 [04:48<4:15:27, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:54:31.682\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 58/3000 [04:54<4:17:12, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:54:37.015\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 59/3000 [04:58<4:04:39, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:54:41.413\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 60/3000 [05:03<3:57:14, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:54:45.905\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 61/3000 [05:08<4:00:39, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:54:50.984\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 62/3000 [05:12<3:52:23, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:54:55.341\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 63/3000 [05:17<3:53:31, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:55:00.169\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 64/3000 [05:21<3:47:09, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:55:04.511\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 65/3000 [05:26<3:41:19, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:55:08.762\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 66/3000 [05:29<3:32:23, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:55:12.682\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 67/3000 [05:34<3:40:57, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:55:17.615\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 68/3000 [05:39<3:38:07, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:55:21.946\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 69/3000 [05:43<3:40:54, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:55:26.605\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 70/3000 [05:48<3:35:41, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:55:30.777\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 71/3000 [05:53<3:47:15, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:55:35.988\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 72/3000 [05:57<3:46:59, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:55:40.630\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 73/3000 [06:02<3:49:38, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:55:45.468\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▏ | 74/3000 [06:07<3:47:23, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:55:50.028\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 2%|▎ | 75/3000 [06:12<3:51:21, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:55:54.967\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 76/3000 [06:16<3:41:29, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:55:59.043\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 77/3000 [06:21<3:51:38, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:56:04.288\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 78/3000 [06:27<4:07:24, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:56:10.127\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 79/3000 [06:31<3:49:33, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:56:13.991\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 80/3000 [06:36<3:53:46, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:56:19.001\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 81/3000 [06:42<4:11:46, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:56:25.043\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 82/3000 [06:46<4:01:39, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:56:29.531\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 83/3000 [06:51<3:59:28, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:56:34.356\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 84/3000 [06:55<3:46:10, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:56:38.376\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 85/3000 [07:01<3:57:44, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:56:43.828\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 86/3000 [07:06<4:01:48, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:56:49.007\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 87/3000 [07:11<3:58:34, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:56:53.769\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 88/3000 [07:15<3:52:16, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:56:58.256\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 89/3000 [07:20<3:49:17, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:57:02.842\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 90/3000 [07:25<3:58:43, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:57:08.221\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 91/3000 [07:30<3:54:24, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:57:12.853\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 92/3000 [07:35<3:55:24, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:57:17.762\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 93/3000 [07:39<3:50:46, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:57:22.306\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 94/3000 [07:43<3:42:38, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:57:26.515\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 95/3000 [07:47<3:33:47, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:57:30.507\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 96/3000 [07:52<3:36:10, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:57:35.092\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 97/3000 [07:57<3:41:12, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:57:39.911\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 98/3000 [08:01<3:35:23, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:57:44.087\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 99/3000 [08:05<3:33:47, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:57:48.435\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 100/3000 [08:09<3:27:35, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:57:52.434\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 101/3000 [08:14<3:37:14, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:57:57.400\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 102/3000 [08:19<3:47:34, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:58:02.614\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 103/3000 [08:24<3:39:48, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:58:06.795\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 3%|▎ | 104/3000 [08:28<3:35:50, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:58:11.079\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 105/3000 [08:32<3:32:36, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:58:15.332\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 106/3000 [08:36<3:30:34, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:58:19.604\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 107/3000 [08:41<3:32:08, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:58:24.083\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 108/3000 [08:45<3:34:10, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:58:28.628\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 109/3000 [08:50<3:33:17, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:58:33.016\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 110/3000 [08:55<3:40:06, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:58:37.919\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 111/3000 [08:59<3:33:51, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:58:42.062\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▎ | 112/3000 [09:04<3:38:23, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:58:46.822\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 113/3000 [09:09<3:48:14, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:58:52.047\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 114/3000 [09:13<3:44:30, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:58:56.537\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 115/3000 [09:18<3:40:13, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:59:00.914\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 116/3000 [09:22<3:40:25, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:59:05.512\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 117/3000 [09:27<3:44:04, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:59:10.357\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 118/3000 [09:32<3:43:07, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:59:14.960\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 119/3000 [09:36<3:37:25, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:59:19.215\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 120/3000 [09:40<3:32:03, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:59:23.375\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 121/3000 [09:45<3:34:04, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:59:27.939\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 122/3000 [09:49<3:34:21, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:59:32.425\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 123/3000 [09:54<3:32:56, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:59:36.800\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 124/3000 [09:59<3:43:54, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 09:59:42.009\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 125/3000 [10:04<3:45:56, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:59:46.827\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 126/3000 [10:10<4:03:16, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:59:52.754\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 127/3000 [10:14<3:55:50, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 09:59:57.321\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 128/3000 [10:18<3:45:30, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:00:01.533\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 129/3000 [10:23<3:37:49, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:00:05.714\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 130/3000 [10:27<3:40:24, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:00:10.452\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 131/3000 [10:32<3:46:47, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:00:15.510\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 132/3000 [10:37<3:47:50, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:00:20.331\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 133/3000 [10:43<3:59:38, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:00:25.926\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 134/3000 [10:48<4:04:41, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:00:31.303\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 4%|▍ | 135/3000 [10:53<3:56:20, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:00:35.846\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 136/3000 [10:57<3:49:46, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:00:40.343\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 137/3000 [11:03<4:03:06, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:00:46.093\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 138/3000 [11:08<4:03:47, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:00:51.242\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 139/3000 [11:14<4:19:53, 5.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:00:57.485\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 140/3000 [11:19<4:09:28, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:01:02.213\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 141/3000 [11:24<4:01:08, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:01:06.870\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 142/3000 [11:28<3:55:22, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:01:11.533\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 143/3000 [11:33<3:53:16, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:01:16.333\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 144/3000 [11:38<3:58:10, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:01:21.581\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 145/3000 [11:42<3:44:18, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:01:25.619\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 146/3000 [11:47<3:42:49, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:01:30.235\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 147/3000 [11:52<3:44:59, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:01:35.076\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 148/3000 [11:57<3:45:38, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:01:39.859\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▍ | 149/3000 [12:02<3:50:02, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:01:44.921\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 150/3000 [12:06<3:41:01, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:01:49.135\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 151/3000 [13:40<24:56:14, 31.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:03:23.314\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 152/3000 [13:45<18:31:19, 23.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:03:27.830\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 153/3000 [13:49<13:59:07, 17.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:03:32.149\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 154/3000 [13:53<10:50:20, 13.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:03:36.588\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 155/3000 [13:58<8:33:49, 10.84s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:03:40.717\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 156/3000 [14:03<7:11:53, 9.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:03:45.804\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 157/3000 [14:07<6:05:20, 7.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:03:50.245\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 158/3000 [14:11<5:14:23, 6.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:03:54.379\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 159/3000 [14:16<4:53:24, 6.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:03:59.547\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 160/3000 [14:22<4:48:01, 6.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:04:05.371\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 161/3000 [14:27<4:34:26, 5.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:04:10.507\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 162/3000 [14:34<4:40:13, 5.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:04:16.721\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 163/3000 [14:39<4:30:51, 5.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:04:21.993\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 5%|▌ | 164/3000 [14:43<4:12:01, 5.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:04:26.400\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 165/3000 [14:48<3:57:30, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:04:30.714\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 166/3000 [14:53<4:02:41, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:04:36.112\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 167/3000 [14:58<4:00:48, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:04:41.124\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 168/3000 [15:02<3:52:19, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:04:45.631\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 169/3000 [15:07<3:47:55, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:04:50.247\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 170/3000 [15:12<3:48:26, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:04:55.120\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 171/3000 [15:17<3:47:13, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:04:59.883\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 172/3000 [15:21<3:42:43, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:05:04.390\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 173/3000 [15:26<3:48:23, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:05:09.521\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 174/3000 [15:31<3:51:10, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:05:14.572\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 175/3000 [15:36<3:45:46, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:05:19.103\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 176/3000 [15:40<3:41:47, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:05:23.622\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 177/3000 [15:45<3:35:07, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:05:27.867\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 178/3000 [15:50<3:43:54, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:05:33.068\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 179/3000 [15:54<3:38:10, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:05:37.427\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 180/3000 [15:59<3:46:38, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:05:42.674\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 181/3000 [16:05<3:54:21, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:05:48.049\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 182/3000 [16:10<3:58:00, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:05:53.302\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 183/3000 [16:15<3:53:51, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:05:58.081\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 184/3000 [16:20<3:50:00, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:06:02.795\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 185/3000 [16:25<3:54:41, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:06:08.034\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 186/3000 [16:30<3:52:47, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:06:12.907\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▌ | 187/3000 [16:35<3:56:06, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:06:18.112\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 188/3000 [16:40<3:54:51, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:06:23.065\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 189/3000 [16:46<4:08:50, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:06:29.077\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 190/3000 [16:51<4:01:00, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:06:33.837\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 191/3000 [16:55<3:50:30, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:06:38.242\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 192/3000 [17:00<3:48:01, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:06:42.995\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 193/3000 [17:06<4:03:57, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:06:49.008\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 194/3000 [17:10<3:50:21, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:06:53.260\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 6%|▋ | 195/3000 [17:15<3:51:22, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:06:58.264\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 196/3000 [17:20<3:52:01, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:07:03.266\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 197/3000 [17:24<3:41:52, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:07:07.512\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 198/3000 [17:30<3:48:48, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:07:12.762\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 199/3000 [17:34<3:39:03, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:07:16.971\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 200/3000 [17:39<3:41:42, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:07:21.859\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 201/3000 [17:45<4:04:13, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:07:28.224\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 202/3000 [17:54<5:01:04, 6.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:07:37.529\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 203/3000 [17:59<4:32:48, 5.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:07:41.972\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 204/3000 [18:04<4:21:33, 5.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:07:47.026\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 205/3000 [18:08<4:05:59, 5.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:07:51.532\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 206/3000 [18:13<3:55:25, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:07:56.062\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 207/3000 [18:18<3:54:01, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:08:01.024\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 208/3000 [18:23<3:53:34, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:08:06.025\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 209/3000 [18:27<3:47:37, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:08:10.624\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 210/3000 [18:32<3:39:24, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:08:14.934\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 211/3000 [18:36<3:37:12, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:08:19.500\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 212/3000 [18:42<3:47:58, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:08:24.952\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 213/3000 [18:48<4:02:50, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:08:30.931\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 214/3000 [18:53<4:02:15, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:08:36.122\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 215/3000 [18:58<3:53:32, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:08:40.720\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 216/3000 [19:02<3:50:08, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:08:45.513\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 217/3000 [19:08<4:01:51, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:08:51.321\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 218/3000 [19:13<3:57:53, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:08:56.257\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 219/3000 [19:18<3:53:39, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:09:01.089\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 220/3000 [19:23<3:48:42, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:09:05.780\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 221/3000 [19:27<3:45:23, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:09:10.484\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 222/3000 [19:32<3:41:12, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:09:15.054\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 223/3000 [19:36<3:31:19, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:09:19.127\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 7%|▋ | 224/3000 [19:41<3:31:28, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:09:23.709\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 225/3000 [19:45<3:28:53, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:09:28.098\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 226/3000 [19:50<3:37:33, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:09:33.245\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 227/3000 [19:56<3:51:24, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:09:38.956\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 228/3000 [20:00<3:35:47, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:09:42.841\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 229/3000 [20:04<3:28:14, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:09:46.973\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 230/3000 [20:09<3:44:36, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:09:52.669\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 231/3000 [20:14<3:46:03, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:09:57.645\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 232/3000 [20:19<3:44:02, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:10:02.403\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 233/3000 [20:24<3:37:37, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:10:06.802\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 234/3000 [20:30<4:01:23, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:10:13.246\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 235/3000 [20:35<3:56:25, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:10:18.129\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 236/3000 [20:40<3:58:15, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:10:23.398\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 237/3000 [20:45<3:55:57, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:10:28.409\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 238/3000 [20:50<3:53:08, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:10:33.335\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 239/3000 [20:55<3:51:06, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:10:38.260\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 240/3000 [21:00<3:45:23, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:10:42.874\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 241/3000 [21:06<4:02:56, 5.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:10:49.051\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 242/3000 [21:11<3:55:02, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:10:53.768\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 243/3000 [21:15<3:48:18, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:10:58.399\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 244/3000 [21:20<3:42:08, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:11:02.926\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 245/3000 [21:24<3:36:27, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:11:07.356\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 246/3000 [21:29<3:37:21, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:11:12.141\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 247/3000 [21:34<3:35:17, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:11:16.732\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 248/3000 [21:39<3:42:55, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:11:21.984\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 249/3000 [21:44<3:53:52, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:11:27.647\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 250/3000 [21:49<3:48:50, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:11:32.387\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 251/3000 [21:54<3:41:42, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:11:36.867\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 252/3000 [21:58<3:36:04, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:11:41.303\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 253/3000 [22:02<3:30:28, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:11:45.619\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 254/3000 [22:07<3:35:55, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:11:50.618\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 8%|▊ | 255/3000 [22:11<3:25:32, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:11:54.585\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▊ | 256/3000 [22:16<3:25:43, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:11:59.097\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▊ | 257/3000 [22:20<3:16:52, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:12:02.955\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▊ | 258/3000 [22:24<3:13:57, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:12:07.054\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▊ | 259/3000 [22:28<3:17:55, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:12:11.593\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▊ | 260/3000 [22:33<3:18:31, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:12:15.974\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▊ | 261/3000 [22:38<3:25:46, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:12:20.856\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▊ | 262/3000 [22:42<3:26:32, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:12:25.425\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 263/3000 [22:47<3:33:44, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:12:30.482\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 264/3000 [22:51<3:25:29, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:12:34.571\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 265/3000 [22:56<3:30:04, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:12:39.418\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 266/3000 [23:01<3:30:59, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:12:44.099\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 267/3000 [23:05<3:24:46, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:12:48.280\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 268/3000 [23:10<3:30:00, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:12:53.164\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 269/3000 [23:14<3:22:27, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:12:57.229\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 270/3000 [23:18<3:20:22, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:13:01.530\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 271/3000 [23:23<3:24:23, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:13:06.233\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 272/3000 [23:28<3:26:13, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:13:10.867\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 273/3000 [23:32<3:27:20, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:13:15.490\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 274/3000 [23:38<3:38:07, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:13:20.849\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 275/3000 [23:42<3:27:38, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:13:24.887\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 276/3000 [23:46<3:24:59, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:13:29.270\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 277/3000 [23:50<3:18:12, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:13:33.293\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 278/3000 [23:54<3:16:40, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:13:37.552\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 279/3000 [23:59<3:14:01, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:13:41.698\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 280/3000 [24:03<3:17:44, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:13:46.255\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 281/3000 [24:08<3:26:52, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:13:51.294\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 282/3000 [24:13<3:29:21, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:13:56.048\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 283/3000 [24:17<3:27:52, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:14:00.565\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 9%|▉ | 284/3000 [24:24<3:48:42, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:14:06.696\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 285/3000 [24:28<3:46:23, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:14:11.584\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 286/3000 [24:34<3:51:44, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:14:16.988\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 287/3000 [24:38<3:43:43, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:14:21.526\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 288/3000 [24:43<3:38:33, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:14:26.099\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 289/3000 [24:47<3:33:25, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:14:30.562\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 290/3000 [24:52<3:36:27, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:14:35.515\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 291/3000 [24:57<3:36:46, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:14:40.337\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 292/3000 [25:02<3:33:18, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:14:44.888\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 293/3000 [25:06<3:31:41, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:14:49.501\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 294/3000 [25:10<3:23:06, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:14:53.564\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 295/3000 [25:15<3:22:07, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:14:58.001\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 296/3000 [25:20<3:29:32, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:15:03.038\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 297/3000 [25:24<3:28:57, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:15:07.651\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 298/3000 [25:30<3:35:41, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:15:12.793\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|▉ | 299/3000 [25:34<3:34:13, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:15:17.480\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 300/3000 [25:39<3:28:31, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:15:21.823\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 301/3000 [25:44<3:32:18, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:15:26.743\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 302/3000 [25:48<3:26:49, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:15:31.062\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 303/3000 [25:53<3:28:34, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:15:35.797\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 304/3000 [25:57<3:27:39, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:15:40.374\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 305/3000 [26:02<3:33:42, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:15:45.451\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 306/3000 [26:07<3:33:31, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:15:50.200\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 307/3000 [26:12<3:34:27, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:15:55.031\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 308/3000 [26:20<4:19:53, 5.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:16:03.191\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 309/3000 [26:24<3:57:41, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:16:07.341\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 310/3000 [26:30<4:01:46, 5.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:16:12.951\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 311/3000 [26:34<3:45:37, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:16:17.149\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 312/3000 [26:39<3:42:07, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:16:21.930\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 313/3000 [26:43<3:34:43, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:16:26.343\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 314/3000 [26:48<3:30:58, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:16:30.864\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 10%|█ | 315/3000 [26:53<3:35:57, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:16:35.954\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 316/3000 [26:57<3:32:11, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:16:40.505\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 317/3000 [27:03<3:39:10, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:16:45.774\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 318/3000 [27:07<3:35:19, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:16:50.395\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 319/3000 [27:12<3:39:15, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:16:55.511\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 320/3000 [27:17<3:39:15, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:17:00.424\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 321/3000 [27:22<3:33:56, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:17:04.943\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 322/3000 [27:27<3:34:19, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:17:09.769\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 323/3000 [27:31<3:32:14, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:17:14.422\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 324/3000 [27:36<3:28:42, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:17:18.920\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 325/3000 [27:40<3:29:11, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:17:23.642\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 326/3000 [27:45<3:33:30, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:17:28.663\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 327/3000 [27:50<3:31:56, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:17:33.342\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 328/3000 [27:55<3:31:49, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:17:38.097\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 329/3000 [27:59<3:28:08, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:17:42.584\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 330/3000 [28:04<3:30:05, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:17:47.412\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 331/3000 [28:09<3:28:35, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:17:52.026\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 332/3000 [28:13<3:25:39, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:17:56.502\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 333/3000 [28:18<3:26:34, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:18:01.200\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 334/3000 [28:22<3:20:21, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:18:05.388\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 335/3000 [28:27<3:17:50, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:18:09.714\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 336/3000 [28:30<3:10:57, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:18:13.657\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█ | 337/3000 [28:36<3:28:13, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:18:19.260\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 338/3000 [28:40<3:23:33, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:18:23.606\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 339/3000 [28:45<3:18:22, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:18:27.810\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 340/3000 [28:49<3:14:16, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:18:31.981\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 341/3000 [28:54<3:21:12, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:18:36.890\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 342/3000 [28:58<3:21:37, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:18:41.468\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 343/3000 [29:03<3:22:28, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:18:46.089\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 11%|█▏ | 344/3000 [29:08<3:24:22, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:18:50.809\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 345/3000 [29:12<3:16:44, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:18:54.857\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 346/3000 [29:16<3:14:50, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:18:59.166\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 347/3000 [29:21<3:16:44, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:19:03.720\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 348/3000 [29:25<3:15:09, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:19:08.055\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 349/3000 [29:29<3:13:08, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:19:12.323\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 350/3000 [29:34<3:18:20, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:19:17.093\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 351/3000 [29:38<3:19:15, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:19:21.659\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 352/3000 [29:43<3:22:25, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:19:26.417\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 353/3000 [29:47<3:18:02, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:19:30.678\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 354/3000 [29:52<3:21:04, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:19:35.403\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 355/3000 [29:57<3:28:16, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:19:40.512\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 356/3000 [30:01<3:19:02, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:19:44.544\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 357/3000 [30:06<3:18:39, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:19:49.037\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 358/3000 [30:10<3:16:19, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:19:53.377\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 359/3000 [30:15<3:18:18, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:19:57.991\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 360/3000 [30:20<3:22:30, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:20:02.820\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 361/3000 [30:25<3:27:30, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:20:07.807\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 362/3000 [30:29<3:25:10, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:20:12.355\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 363/3000 [30:34<3:22:41, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:20:16.838\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 364/3000 [30:38<3:22:47, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:20:21.464\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 365/3000 [30:43<3:19:38, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:20:25.846\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 366/3000 [30:47<3:15:52, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:20:30.111\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 367/3000 [30:51<3:12:55, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:20:34.356\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 368/3000 [30:57<3:34:34, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:20:40.402\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 369/3000 [31:02<3:32:30, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:20:45.142\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 370/3000 [31:06<3:24:00, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:20:49.349\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 371/3000 [31:11<3:28:52, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:20:54.379\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 372/3000 [31:15<3:22:33, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:20:58.672\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 373/3000 [31:20<3:20:10, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:21:03.120\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▏ | 374/3000 [31:25<3:28:35, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:21:08.340\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 12%|█▎ | 375/3000 [31:31<3:38:59, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:21:13.904\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 376/3000 [31:36<3:42:09, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:21:19.157\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 377/3000 [31:41<3:44:40, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:21:24.436\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 378/3000 [31:46<3:44:05, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:21:29.537\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 379/3000 [31:51<3:32:47, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:21:33.809\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 380/3000 [31:56<3:43:46, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:21:39.525\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 381/3000 [32:06<4:39:14, 6.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:21:48.892\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 382/3000 [32:10<4:14:35, 5.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:21:53.414\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 383/3000 [32:15<3:59:30, 5.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:21:58.104\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 384/3000 [32:20<3:47:47, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:22:02.706\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 385/3000 [32:24<3:38:23, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:22:07.218\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 386/3000 [32:29<3:31:46, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:22:11.729\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 387/3000 [32:33<3:26:52, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:22:16.221\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 388/3000 [32:38<3:23:09, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:22:20.694\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 389/3000 [32:42<3:21:14, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:22:25.220\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 390/3000 [32:47<3:25:41, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:22:30.191\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 391/3000 [32:52<3:23:14, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:22:34.738\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 392/3000 [32:56<3:19:27, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:22:39.127\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 393/3000 [33:00<3:12:15, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:22:43.169\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 394/3000 [33:04<3:06:18, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:22:47.143\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 395/3000 [33:09<3:16:11, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:22:52.197\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 396/3000 [33:14<3:19:20, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:22:56.964\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 397/3000 [33:18<3:18:20, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:23:01.485\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 398/3000 [33:23<3:18:16, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:23:06.058\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 399/3000 [33:28<3:20:23, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:23:10.798\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 400/3000 [33:33<3:27:38, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:23:15.986\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 401/3000 [33:37<3:25:59, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:23:20.656\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 402/3000 [33:42<3:23:59, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:23:25.263\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 403/3000 [33:48<3:36:49, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:23:30.969\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 13%|█▎ | 404/3000 [33:53<3:41:15, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:23:36.327\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 405/3000 [33:58<3:40:20, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:23:41.377\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 406/3000 [34:04<3:48:31, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:23:47.108\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 407/3000 [34:09<3:45:07, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:23:52.138\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 408/3000 [34:14<3:36:57, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:23:56.724\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 409/3000 [34:18<3:30:10, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:24:01.230\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 410/3000 [34:23<3:29:20, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:24:06.039\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 411/3000 [34:27<3:24:37, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:24:10.530\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▎ | 412/3000 [34:31<3:10:29, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:24:14.186\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 413/3000 [34:35<3:05:35, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:24:18.229\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 414/3000 [34:39<3:06:27, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:24:22.607\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 415/3000 [34:44<3:08:22, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:24:27.087\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 416/3000 [34:48<3:08:20, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:24:31.462\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 417/3000 [34:53<3:12:58, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:24:36.199\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 418/3000 [34:58<3:17:35, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:24:41.046\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 419/3000 [35:02<3:15:28, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:24:45.480\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 420/3000 [35:07<3:17:29, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:24:50.186\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 421/3000 [35:11<3:07:24, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:24:54.002\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 422/3000 [35:16<3:14:06, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:24:58.888\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 423/3000 [35:21<3:27:00, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:25:04.412\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 424/3000 [35:26<3:32:12, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:25:09.642\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 425/3000 [35:31<3:29:50, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:25:14.407\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 426/3000 [35:36<3:22:38, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:25:18.744\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 427/3000 [35:40<3:21:16, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:25:23.367\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 428/3000 [35:45<3:16:56, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:25:27.729\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 429/3000 [35:48<3:05:17, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:25:31.424\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 430/3000 [35:53<3:10:59, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:25:36.197\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 431/3000 [35:58<3:16:58, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:25:41.128\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 432/3000 [36:03<3:18:01, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:25:45.815\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 433/3000 [36:07<3:19:27, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:25:50.561\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 434/3000 [36:12<3:14:22, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:25:54.832\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 14%|█▍ | 435/3000 [36:16<3:08:22, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:25:58.915\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 436/3000 [36:20<3:07:38, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:26:03.270\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 437/3000 [36:25<3:11:18, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:26:07.953\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 438/3000 [36:29<3:11:34, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:26:12.458\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 439/3000 [36:35<3:23:19, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:26:17.868\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 440/3000 [36:40<3:26:23, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:26:22.878\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 441/3000 [36:45<3:28:08, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:26:27.858\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 442/3000 [36:49<3:26:24, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:26:32.609\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 443/3000 [36:54<3:28:51, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:26:37.648\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 444/3000 [36:59<3:19:48, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:26:41.848\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 445/3000 [37:03<3:09:42, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:26:45.753\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 446/3000 [37:07<3:06:13, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:26:49.942\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 447/3000 [37:13<3:23:38, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:26:55.686\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 448/3000 [37:17<3:19:06, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:27:00.124\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▍ | 449/3000 [37:22<3:18:08, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:27:04.735\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 450/3000 [37:26<3:21:10, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:27:09.640\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 451/3000 [37:32<3:29:44, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:27:15.052\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 452/3000 [37:36<3:24:09, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:27:19.557\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 453/3000 [37:40<3:14:52, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:27:23.641\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 454/3000 [37:45<3:12:39, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:27:28.064\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 455/3000 [37:50<3:17:15, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:27:32.972\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 456/3000 [37:55<3:21:18, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:27:37.946\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 457/3000 [38:00<3:27:50, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:27:43.214\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 458/3000 [38:04<3:20:34, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:27:47.553\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 459/3000 [38:09<3:22:04, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:27:52.411\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 460/3000 [38:13<3:10:40, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:27:56.291\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 461/3000 [38:18<3:13:32, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:28:01.027\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 462/3000 [38:23<3:24:31, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:28:06.472\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 463/3000 [38:28<3:20:46, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:28:11.018\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 15%|█▌ | 464/3000 [38:32<3:18:17, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:28:15.577\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 465/3000 [38:37<3:10:55, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:28:19.694\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 466/3000 [38:42<3:18:25, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:28:24.811\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 467/3000 [38:46<3:16:26, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:28:29.358\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 468/3000 [38:52<3:28:47, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:28:34.993\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 469/3000 [38:56<3:15:14, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:28:38.876\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 470/3000 [39:01<3:19:12, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:28:43.825\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 471/3000 [39:05<3:11:25, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:28:47.939\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 472/3000 [39:09<3:10:38, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:28:52.424\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 473/3000 [39:14<3:13:09, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:28:57.155\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 474/3000 [39:19<3:18:19, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:29:02.156\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 475/3000 [39:23<3:12:30, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:29:06.412\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 476/3000 [39:28<3:18:10, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:29:11.441\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 477/3000 [39:33<3:18:55, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:29:16.218\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 478/3000 [39:38<3:16:36, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:29:20.772\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 479/3000 [39:42<3:15:17, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:29:25.351\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 480/3000 [39:46<3:10:21, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:29:29.614\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 481/3000 [39:51<3:08:16, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:29:33.986\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 482/3000 [39:55<3:07:23, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:29:38.406\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 483/3000 [40:00<3:15:43, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:29:43.541\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 484/3000 [40:05<3:10:42, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:29:47.814\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 485/3000 [40:09<3:03:05, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:29:51.762\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 486/3000 [40:13<3:06:23, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:29:56.398\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▌ | 487/3000 [40:17<2:57:53, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:30:00.175\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 488/3000 [40:21<2:58:54, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:30:04.510\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 489/3000 [40:26<3:02:18, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:30:09.059\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 490/3000 [40:31<3:07:06, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:30:13.803\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 491/3000 [40:35<3:08:10, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:30:18.367\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 492/3000 [40:40<3:10:08, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:30:23.030\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 493/3000 [40:45<3:13:55, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:30:27.886\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 494/3000 [40:49<3:13:37, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:30:32.510\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 16%|█▋ | 495/3000 [40:54<3:11:31, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:30:36.985\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 496/3000 [40:58<3:08:48, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:30:41.361\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 497/3000 [41:04<3:26:29, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:30:47.304\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 498/3000 [41:08<3:18:19, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:30:51.608\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 499/3000 [41:13<3:18:58, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:30:56.422\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 500/3000 [41:18<3:23:05, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:31:01.531\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 501/3000 [41:23<3:17:51, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:31:05.993\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 502/3000 [41:28<3:22:28, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:31:11.119\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 503/3000 [41:33<3:19:01, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:31:15.713\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 504/3000 [41:37<3:15:52, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:31:20.249\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 505/3000 [41:42<3:14:57, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:31:24.890\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 506/3000 [41:46<3:14:56, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:31:29.584\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 507/3000 [41:50<3:04:08, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:31:33.413\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 508/3000 [41:56<3:18:38, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:31:39.015\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 509/3000 [42:01<3:17:58, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:31:43.750\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 510/3000 [42:06<3:28:23, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:31:49.362\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 511/3000 [42:11<3:24:40, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:31:54.092\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 512/3000 [42:16<3:29:59, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:31:59.460\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 513/3000 [42:21<3:25:18, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:32:04.154\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 514/3000 [42:26<3:20:42, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:32:08.743\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 515/3000 [42:30<3:18:47, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:32:13.440\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 516/3000 [42:35<3:15:15, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:32:17.961\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 517/3000 [42:40<3:22:29, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:32:23.266\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 518/3000 [42:45<3:27:26, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:32:28.565\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 519/3000 [42:50<3:22:44, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:32:33.208\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 520/3000 [42:55<3:18:18, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:32:37.760\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 521/3000 [43:01<3:40:04, 5.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:32:44.321\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 522/3000 [43:05<3:27:51, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:32:48.668\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 523/3000 [43:10<3:23:49, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:32:53.382\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 17%|█▋ | 524/3000 [43:14<3:15:30, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:32:57.654\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 525/3000 [43:19<3:10:19, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:33:01.979\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 526/3000 [43:23<3:08:52, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:33:06.482\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 527/3000 [43:28<3:07:50, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:33:10.986\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 528/3000 [43:33<3:09:51, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:33:15.713\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 529/3000 [43:37<3:03:40, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:33:19.827\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 530/3000 [43:41<2:57:25, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:33:23.787\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 531/3000 [43:44<2:50:38, 4.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:33:27.552\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 532/3000 [43:49<2:58:44, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:33:32.361\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 533/3000 [43:54<3:05:43, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:33:37.279\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 534/3000 [43:59<3:12:37, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:33:42.362\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 535/3000 [44:04<3:11:28, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:33:46.961\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 536/3000 [44:09<3:17:26, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:33:52.112\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 537/3000 [44:13<3:10:54, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:33:56.397\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 538/3000 [44:18<3:13:06, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:34:01.232\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 539/3000 [44:23<3:14:29, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:34:06.057\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 540/3000 [44:27<3:09:24, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:34:10.391\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 541/3000 [44:33<3:22:12, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:34:16.058\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 542/3000 [44:38<3:27:19, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:34:21.416\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 543/3000 [44:45<3:49:00, 5.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:34:28.248\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 544/3000 [44:52<4:00:09, 5.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:34:34.756\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 545/3000 [44:57<3:56:12, 5.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:34:40.309\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 546/3000 [45:01<3:36:39, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:34:44.497\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 547/3000 [45:06<3:27:34, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:34:49.061\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 548/3000 [45:11<3:23:11, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:34:53.787\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 549/3000 [45:15<3:15:25, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:34:58.133\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 550/3000 [45:20<3:19:39, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:35:03.268\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 551/3000 [45:24<3:11:10, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:35:07.473\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 552/3000 [45:29<3:17:01, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:35:12.641\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 553/3000 [45:35<3:20:18, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:35:17.744\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 554/3000 [45:39<3:15:18, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:35:22.254\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 18%|█▊ | 555/3000 [45:45<3:26:36, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:35:27.975\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▊ | 556/3000 [45:49<3:19:35, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:35:32.478\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▊ | 557/3000 [45:54<3:17:06, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:35:37.182\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▊ | 558/3000 [45:59<3:18:07, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:35:42.113\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▊ | 559/3000 [46:03<3:06:32, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:35:46.038\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▊ | 560/3000 [46:07<2:59:21, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:35:50.041\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▊ | 561/3000 [46:11<3:00:58, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:35:54.590\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▊ | 562/3000 [46:16<3:04:56, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:35:59.374\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 563/3000 [46:21<3:06:09, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:36:04.031\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 564/3000 [46:25<2:58:33, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:36:07.997\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 565/3000 [46:30<3:08:45, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:36:13.238\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 566/3000 [46:36<3:21:56, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:36:18.979\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 567/3000 [46:41<3:25:12, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:36:24.232\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 568/3000 [46:46<3:29:31, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:36:29.655\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 569/3000 [46:52<3:28:03, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:36:34.711\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 570/3000 [46:56<3:22:37, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:36:39.405\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 571/3000 [47:00<3:09:38, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:36:43.347\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 572/3000 [47:05<3:15:55, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:36:48.555\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 573/3000 [47:10<3:14:08, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:36:53.256\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 574/3000 [47:15<3:11:33, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:36:57.850\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 575/3000 [47:19<3:08:44, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:37:02.361\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 576/3000 [47:23<3:03:15, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:37:06.584\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 577/3000 [47:28<3:03:35, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:37:11.155\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 578/3000 [47:33<3:13:22, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:37:16.515\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 579/3000 [47:40<3:34:52, 5.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:37:23.088\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 580/3000 [47:45<3:26:17, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:37:27.711\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 581/3000 [47:50<3:25:08, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:37:32.737\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 582/3000 [47:54<3:20:49, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:37:37.476\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 583/3000 [47:59<3:17:06, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:37:42.159\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 19%|█▉ | 584/3000 [48:04<3:20:08, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:37:47.310\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 585/3000 [48:09<3:14:56, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:37:51.856\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 586/3000 [48:14<3:17:14, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:37:56.897\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637736.336035507)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637737.07862541)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637737.854351916)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637738.687100533)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637740.247993674)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637741.002977205)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637741.728966857)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637742.46399642)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637743.249611905)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637743.943389603)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637744.810078655)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637745.595790385)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637746.489196623)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637747.544788874)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637748.370157957)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637750.2381142)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637751.026518242)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637751.969951529)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637753.497799357)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637754.3724817)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637749.433265819)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637752.693476598)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637755.116643976)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637755.872260009)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637756.660705017)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637757.367747942)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637758.286863609)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637759.262443108)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637760.076089914)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637760.94867924)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637761.732773379)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637762.590942182)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637763.996541975)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637764.8024088)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637765.819671452)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637767.387544688)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637763.254490203)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637766.629867729)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637768.182407374)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637769.107222574)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637771.256608669)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637772.277522556)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637783.041392003)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637784.053643378)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637785.757134918)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637786.593587474)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637787.663547051)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637789.385090457)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637770.507752487)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637784.876130733)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637788.586101303)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637790.169744267)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637790.938623944)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637793.504587771)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637794.37963168)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637795.843256169)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637796.481855378)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637797.280757692)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637799.120918186)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637799.986056149)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637800.799277444)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637802.653332619)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637791.881573811)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637792.634115723)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637795.133098213)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637798.11832426)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637801.528113129)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637803.286359227)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637803.992788746)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637805.446159254)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637806.178547074)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637807.238839814)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637807.973529144)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637808.699449494)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637809.814061077)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637810.71579754)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637811.442988163)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637812.911542634)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637813.610629191)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637814.311959012)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637812.086213893)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637804.634154512)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637815.299085656)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637816.114452152)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637816.938398359)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637817.640329087)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637818.441324452)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637819.263756299)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637820.394602568)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637821.337668342)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637822.703229178)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637824.234580493)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637840.280549852)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637823.477534245)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637841.163422054)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637843.711219096)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637844.861975883)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637847.418269775)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637848.250150296)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637849.222315357)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637850.847114197)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637851.762079174)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637852.447194235)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637853.50633261)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637854.411304)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637855.136941662)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637842.801399776)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637846.458233142)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637849.930308125)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637855.931253833)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637856.607543801)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637857.322806026)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637858.122812291)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637858.811307546)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637859.575680397)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637861.156029076)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637861.886876617)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637862.547349884)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637863.395981275)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637864.914822757)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637865.585806191)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637866.358397969)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637867.977235353)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637864.155683667)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637867.008636076)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637868.807373667)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637869.552233578)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637871.124802828)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637871.762862637)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637872.507591222)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637873.351309789)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637874.138073675)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637874.944604679)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637875.652682752)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637876.557288239)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637878.110276846)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637879.126791809)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637879.98106333)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637877.362678018)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1637870.352930474)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637880.663669366)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637881.399382416)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637892.136122543)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637892.90095094)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637893.636375178)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637894.669167273)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637902.618606062)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637903.303108092)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637904.143133412)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637906.072853032)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637906.741461485)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637907.502671838)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637908.229559425)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637909.000079467)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637909.717765262)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637910.363056337)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637911.236973403)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637912.222535245)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637912.880866531)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637913.664347687)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637916.402290899)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637917.137876101)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637917.968741444)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637918.885155001)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637920.430979141)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637921.305042268)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637922.000311264)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637923.461717441)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637919.700299923)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637922.727734978)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637924.211604591)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637924.959268032)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637926.819348239)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637927.45996116)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637928.277698046)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637929.05432485)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637929.889933841)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637930.792751752)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637932.121756094)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637932.899227242)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637934.560707633)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637935.25107532)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637935.959870673)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637933.787013638)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637926.061020309)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637936.929405142)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637937.706403658)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637938.493334194)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637939.19973708)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637940.246367109)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637941.350210412)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637942.193371023)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637946.544986873)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637955.748498702)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637965.421515092)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637960.760865826)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637951.348619276)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637841.993887279)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1637969.835950043)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637975.007599362)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637979.828484907)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637989.647380229)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637994.229362387)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637998.634975435)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638003.131060133)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1637985.210590143)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638007.665922786)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638012.426560083)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638017.783686736)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638022.148579119)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638026.483578423)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638035.565266941)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638045.961769401)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638030.718425275)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638041.243932745)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638050.25875767)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638055.525154539)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638060.494574307)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638065.95781989)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638070.054566805)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638081.492830956)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638085.93622224)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638090.237660107)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638074.227803279)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638094.677554643)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638100.165975069)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638104.467506188)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638108.740747505)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638118.43776449)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638128.371652867)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638134.036937179)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638113.471063366)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638123.878697145)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638139.044999955)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638143.729772041)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638168.220978621)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638177.584928914)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638186.66325461)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638191.122839913)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638163.878422451)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638172.764571525)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638182.523205949)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638200.507079503)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638205.271266614)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638210.280212033)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638219.348362706)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638215.017844413)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638226.588897876)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638195.670987358)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638231.208013655)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638236.541325661)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638240.938740873)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638245.431681664)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638254.867127891)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638264.037453346)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638268.288094424)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638250.510923725)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638259.695781162)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638272.208500089)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638281.472700186)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638290.303049615)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638300.156576969)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638277.141074443)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638286.131137592)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638295.514938064)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638304.994813968)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638309.554252352)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638318.569777425)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638329.653797551)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638338.52719537)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638344.569671064)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638314.493165022)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638323.814592644)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638333.51780703)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638349.057921451)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638357.901580262)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638368.533020504)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638377.782481849)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638382.368251468)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1638353.882163992)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638363.354099053)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638373.294990447)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638387.747813246)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638397.288234387)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638406.041045268)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638414.6184874)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638419.437077781)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638392.379299515)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638401.832420501)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638410.033503658)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638423.613278962)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638431.960385044)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638442.140773506)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638450.605220331)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638454.858870026)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638427.961550864)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638436.926174636)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638446.321734945)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638459.130444282)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638468.15430868)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638477.445125816)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638486.348758328)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638491.573633285)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638463.609082617)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638472.542041499)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638481.588105244)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638496.063606546)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638505.038516269)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638514.486089941)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638522.901271602)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638527.465117052)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638500.44014659)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638509.883273615)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638518.741203879)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638531.951669012)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638541.535247788)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638552.280733639)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638561.059667259)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638565.240407752)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638536.326662448)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638546.353668091)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638556.847354813)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638569.978024362)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638579.857898589)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638590.826419328)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638599.869379489)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638605.619882511)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638575.036286651)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638585.452822729)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638595.37270023)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638610.768941449)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638621.739570335)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638631.059506508)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638641.107194908)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638645.144992103)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638617.011540946)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1638626.396783192)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638635.859283222)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638649.761147327)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638659.385964144)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638668.661027876)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638767.356948988)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638654.602718071)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638664.4471251)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638762.839977411)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638771.675689962)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638776.114219689)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638785.330506751)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638793.905667445)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638804.897742688)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638810.033240829)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638780.243514631)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638789.771334627)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638799.073786457)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638816.247926852)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638825.926184984)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638835.638378805)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638845.156982289)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638849.773774176)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638821.519369991)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638840.64991393)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638830.240402833)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638854.646388228)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638863.916073092)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1638874.098387602)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638883.148748977)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638869.047825639)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638887.393681598)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638859.409346004)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638878.629298872)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638892.594242943)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638902.200473395)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638912.828495268)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638922.321405654)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638927.560228524)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638896.953938344)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638907.575182886)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638917.607278944)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638932.433250107)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638942.591390419)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638953.363886397)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638962.521430736)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638968.534615267)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638937.638953207)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638948.603895133)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638957.768658246)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638972.7865025)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638977.790757138)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638987.038171639)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1638996.497657621)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639007.750058574)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639017.05574988)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1638982.792346796)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1638992.288326852)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639001.385242025)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639031.058651675)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639035.588508639)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639045.551651012)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639040.550048874)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639021.498565459)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639026.552743625)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639050.150422694)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639054.460558102)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639059.026580205)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639064.478037788)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639070.457199529)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639080.246052683)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639090.847695655)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639095.783190319)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639075.648777405)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639085.039314277)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639100.615735804)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639110.010318201)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639118.653033993)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639127.624791764)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639132.771353882)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639105.306447288)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639114.580935547)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639123.235052281)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639138.482128992)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639146.499636528)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639157.171021471)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639166.328376772)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639172.772096159)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639142.367770719)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639152.195853106)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639161.929725758)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639177.655515603)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639187.935834002)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639197.785551749)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639208.577461632)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639213.294801447)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639182.924044885)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639192.861827605)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639202.399874909)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639217.925192159)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639226.882436878)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639236.258868146)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639247.173045306)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639251.913966151)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639222.452312725)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639231.6678201)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639241.510926878)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639256.393761281)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639265.14516387)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639274.11143154)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639282.481987781)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639286.58033602)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1639260.829191433)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639270.1441239)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639278.623187794)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639291.119132905)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639300.382346078)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639310.008922638)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639318.944391576)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639323.625228574)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639304.951234771)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639314.097290347)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639295.500828399)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639327.806347527)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639336.755923358)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639345.759654707)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639355.01660924)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639360.37533671)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639332.690768078)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639341.055904067)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639350.393526443)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639364.413244246)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639372.81916326)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639381.224766172)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639390.820736716)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639395.574068315)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639368.796468934)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639377.078877054)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639385.781691221)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639400.09187289)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639411.110552784)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639421.052663246)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639430.088527599)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639435.041902085)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639406.222676181)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639416.514584238)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639425.625132083)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639439.863470373)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639449.027470059)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639457.527215682)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639467.177227284)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639444.414632159)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639453.090878193)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639462.564770894)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639472.319428877)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639477.006284777)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639486.269085769)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639495.323167761)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639504.97726426)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639509.726868515)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639481.349188246)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639490.588445746)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639499.900768687)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639514.557665582)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639526.867346647)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639536.675978063)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639545.869193504)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639550.390430006)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639522.717876145)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1639541.456511492)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639532.477611722)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639555.480185899)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639565.300836509)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639575.037619865)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639584.469435192)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639589.295789247)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639560.031144348)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639569.921664984)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639579.950858325)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639593.948147615)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639603.168673292)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639612.868699152)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639622.11050514)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639626.938328008)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639598.446790935)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639608.189597301)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639617.623926553)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639631.552111489)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639640.726831332)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639649.240403321)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639658.786384577)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639663.13278202)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639636.028102449)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639644.914194592)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639653.183304205)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639667.336702678)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639676.416388095)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1639685.614963033)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639694.38370753)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639698.692624859)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639671.507792119)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639680.993792718)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639690.335791388)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639703.246212289)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639711.849799589)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639721.185516469)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639730.204808799)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639734.929167573)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639707.581454425)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639716.6195822)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639725.94363572)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639740.038817898)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639748.563770152)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639757.517805037)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639767.333638793)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639771.881086759)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639744.070586612)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639752.903088098)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639762.346440843)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639776.36496384)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639785.372903064)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639793.88207628)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639804.668842095)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639808.875595425)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639780.990050057)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1639789.637930469)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639799.928432395)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639813.905056124)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639822.646544627)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639833.430922657)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639843.962381985)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639849.063456276)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639818.198458329)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639827.866282758)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639838.683754513)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639853.335129166)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639868.418418262)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639877.630108251)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639886.744633631)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639891.25541864)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639859.05185506)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639872.940683017)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639882.232488739)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639895.747966942)])']\n", "connector: \n", "Evaluating workflow: 20%|█▉ | 587/3000 [48:19<3:23:46, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:38:02.348\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 588/3000 [48:23<3:07:53, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:38:06.105\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 589/3000 [48:28<3:09:11, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:38:10.893\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 590/3000 [48:32<2:59:04, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:38:14.769\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 591/3000 [48:36<3:01:55, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:38:19.469\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 592/3000 [48:40<2:52:48, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:38:23.249\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 593/3000 [48:46<3:17:25, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:38:29.607\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 594/3000 [48:51<3:14:13, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:38:34.269\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 595/3000 [48:55<3:07:34, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:38:38.566\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 596/3000 [49:01<3:15:54, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:38:43.946\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 597/3000 [49:05<3:05:01, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:38:47.936\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 598/3000 [49:09<3:04:18, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:38:52.503\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|█▉ | 599/3000 [49:14<3:08:09, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:38:57.434\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 600/3000 [49:19<3:03:35, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:39:01.762\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 601/3000 [49:23<2:59:48, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:39:06.043\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 602/3000 [49:29<3:17:58, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:39:12.062\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 603/3000 [49:37<3:54:22, 5.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:39:20.059\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 604/3000 [49:42<3:40:00, 5.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:39:24.735\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 605/3000 [49:46<3:28:31, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:39:29.292\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 606/3000 [49:51<3:19:05, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:39:33.735\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 607/3000 [49:55<3:15:48, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:39:38.459\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 608/3000 [50:00<3:09:55, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:39:42.882\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 609/3000 [50:04<3:03:48, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:39:47.142\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 610/3000 [50:08<3:01:03, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:39:51.531\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 611/3000 [50:13<3:06:31, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:39:56.540\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 612/3000 [50:18<3:06:00, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:40:01.188\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 613/3000 [50:22<2:58:53, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:40:05.271\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 614/3000 [50:27<3:03:01, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:40:10.121\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 20%|██ | 615/3000 [50:32<3:02:38, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:40:14.698\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 616/3000 [50:38<3:24:06, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:40:21.100\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 617/3000 [50:42<3:13:16, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:40:25.335\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 618/3000 [50:46<3:06:39, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:40:29.652\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 619/3000 [50:51<3:09:29, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:40:34.599\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 620/3000 [50:56<3:07:43, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:40:39.233\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 621/3000 [51:01<3:12:55, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:40:44.409\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 622/3000 [51:06<3:09:12, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:40:48.968\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 623/3000 [51:11<3:14:31, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:40:54.197\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 624/3000 [51:18<3:39:58, 5.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:41:01.256\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 625/3000 [51:22<3:21:58, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:41:05.303\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 626/3000 [51:26<3:09:21, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:41:09.349\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 627/3000 [51:31<3:09:00, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:41:14.112\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 628/3000 [51:35<3:06:24, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:41:18.678\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 629/3000 [51:41<3:11:20, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:41:23.816\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 630/3000 [51:45<3:08:10, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:41:28.398\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 631/3000 [52:00<5:08:19, 7.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:41:43.312\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 632/3000 [52:04<4:25:37, 6.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:41:47.525\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 633/3000 [52:09<3:57:38, 6.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:41:51.902\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 634/3000 [52:14<3:49:41, 5.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:41:57.262\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 635/3000 [52:18<3:30:29, 5.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:42:01.471\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 636/3000 [52:23<3:23:38, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:42:06.240\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██ | 637/3000 [52:29<3:37:19, 5.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:42:12.574\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 638/3000 [52:35<3:33:30, 5.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:42:17.776\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 639/3000 [52:39<3:16:29, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:42:21.767\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 640/3000 [52:42<3:02:35, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:42:25.588\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 641/3000 [52:47<3:00:51, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:42:30.091\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 642/3000 [52:51<2:59:56, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:42:34.619\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 643/3000 [52:56<2:57:53, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:42:39.030\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 21%|██▏ | 644/3000 [53:00<2:57:43, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:42:43.551\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 645/3000 [53:05<2:54:16, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:42:47.791\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 646/3000 [53:10<3:06:38, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:42:53.288\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 647/3000 [53:15<3:11:03, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:42:58.428\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 648/3000 [53:19<3:03:30, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:43:02.663\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 649/3000 [53:25<3:16:26, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:43:08.453\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 650/3000 [53:31<3:19:10, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:43:13.706\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 651/3000 [53:37<3:34:35, 5.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:43:20.111\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 652/3000 [53:42<3:25:42, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:43:24.843\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 653/3000 [53:46<3:17:15, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:43:29.387\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 654/3000 [53:51<3:13:30, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:43:34.117\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 655/3000 [53:55<3:04:52, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:43:38.336\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 656/3000 [54:00<3:04:07, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:43:43.010\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 657/3000 [54:05<3:06:48, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:43:47.958\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 658/3000 [54:09<2:59:22, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:43:52.115\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 659/3000 [54:13<2:57:17, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:43:56.538\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 660/3000 [54:18<2:57:44, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:44:01.128\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 661/3000 [54:23<2:57:41, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:44:05.687\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 662/3000 [54:28<3:07:52, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:44:11.122\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 663/3000 [54:33<3:08:31, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:44:16.007\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 664/3000 [54:37<3:03:36, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:44:20.433\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 665/3000 [54:42<3:05:02, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:44:25.279\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 666/3000 [54:47<3:03:48, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:44:29.935\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 667/3000 [54:51<2:58:40, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:44:34.227\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 668/3000 [54:56<2:59:32, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:44:38.903\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 669/3000 [55:01<3:07:24, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:44:44.204\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 670/3000 [55:06<3:06:06, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:44:48.922\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 671/3000 [55:11<3:06:41, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:44:53.772\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 672/3000 [55:15<3:04:40, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:44:58.416\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 673/3000 [55:20<3:02:13, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:45:02.971\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▏ | 674/3000 [55:24<2:52:02, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:45:06.801\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 22%|██▎ | 675/3000 [55:28<2:55:44, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:45:11.564\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 676/3000 [55:33<3:01:11, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:45:16.574\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 677/3000 [55:38<3:02:24, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:45:21.364\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 678/3000 [55:43<3:01:09, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:45:25.974\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 679/3000 [55:47<2:58:55, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:45:30.469\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 680/3000 [55:52<2:59:19, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:45:35.136\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 681/3000 [55:57<2:58:47, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:45:39.734\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 682/3000 [56:02<3:02:47, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:45:44.712\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 683/3000 [56:06<3:04:56, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:45:49.636\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 684/3000 [56:11<3:03:41, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:45:54.324\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 685/3000 [56:16<3:04:30, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:45:59.160\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 686/3000 [56:21<3:07:39, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:46:04.221\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 687/3000 [56:25<2:56:57, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:46:08.169\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 688/3000 [56:29<2:51:55, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:46:12.331\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 689/3000 [56:33<2:49:30, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:46:16.590\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 690/3000 [56:39<2:58:19, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:46:21.760\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 691/3000 [56:43<2:54:31, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:46:26.070\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 692/3000 [56:47<2:52:08, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:46:30.405\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 693/3000 [56:52<2:51:27, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:46:34.827\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 694/3000 [56:57<2:57:04, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:46:39.781\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 695/3000 [57:01<2:59:53, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:46:44.639\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 696/3000 [57:06<3:00:43, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:46:49.400\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 697/3000 [57:11<3:04:27, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:46:54.438\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 698/3000 [57:16<3:03:45, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:46:59.190\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 699/3000 [57:20<2:56:14, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:47:03.333\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 700/3000 [57:25<3:04:45, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:47:08.677\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 701/3000 [57:30<3:04:33, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:47:13.486\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 702/3000 [57:35<2:59:07, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:47:17.837\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 703/3000 [57:39<2:56:43, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:47:22.311\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 23%|██▎ | 704/3000 [57:43<2:46:36, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:47:26.053\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 705/3000 [57:48<2:55:32, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:47:31.192\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 706/3000 [57:52<2:53:13, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:47:35.587\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 707/3000 [57:58<2:59:50, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:47:40.701\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 708/3000 [58:02<2:54:05, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:47:44.912\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 709/3000 [58:06<2:51:06, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:47:49.215\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 710/3000 [58:11<2:58:54, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:47:54.383\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 711/3000 [58:16<2:54:54, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:47:58.728\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▎ | 712/3000 [58:20<2:49:13, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:48:02.824\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 713/3000 [58:24<2:49:56, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:48:07.329\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 714/3000 [58:29<2:51:36, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:48:11.941\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 715/3000 [58:33<2:45:13, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:48:15.893\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 716/3000 [58:37<2:48:50, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:48:20.554\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 717/3000 [58:42<2:52:13, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:48:25.292\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 718/3000 [58:47<2:54:50, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:48:30.054\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 719/3000 [58:52<2:56:24, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:48:34.795\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 720/3000 [58:56<2:57:08, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:48:39.507\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 721/3000 [59:02<3:05:47, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:48:44.934\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 722/3000 [59:07<3:11:35, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:48:50.343\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 723/3000 [59:12<3:09:48, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:48:55.240\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 724/3000 [59:16<3:00:28, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:48:59.429\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 725/3000 [59:21<3:00:47, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:49:04.221\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 726/3000 [59:26<3:03:33, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:49:09.239\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 727/3000 [59:31<3:03:32, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:49:14.089\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 728/3000 [59:36<3:04:30, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:49:19.026\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 729/3000 [59:42<3:16:27, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:49:24.957\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 730/3000 [59:47<3:14:27, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:49:29.979\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 731/3000 [59:56<4:03:32, 6.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:49:39.453\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 732/3000 [1:00:01<3:41:03, 5.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:49:43.919\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 733/3000 [1:00:05<3:24:23, 5.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:49:48.306\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 734/3000 [1:00:09<3:09:12, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:49:52.383\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 24%|██▍ | 735/3000 [1:00:15<3:15:03, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:49:57.917\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 736/3000 [1:00:19<3:03:25, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:50:02.065\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 737/3000 [1:00:23<2:57:54, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:50:06.444\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 738/3000 [1:00:27<2:51:06, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:50:10.568\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 739/3000 [1:00:32<2:49:08, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:50:14.939\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 740/3000 [1:00:36<2:48:14, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:50:19.354\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 741/3000 [1:00:41<2:53:37, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:50:24.304\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 742/3000 [1:00:46<2:58:48, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:50:29.381\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 743/3000 [1:00:52<3:05:31, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:50:34.735\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 744/3000 [1:00:55<2:54:18, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:50:38.680\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 745/3000 [1:01:00<2:53:44, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:50:43.273\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 746/3000 [1:01:04<2:43:21, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:50:46.982\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 747/3000 [1:01:08<2:39:47, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:50:51.019\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 748/3000 [1:01:13<2:48:00, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:50:56.011\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▍ | 749/3000 [1:01:17<2:49:05, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:51:00.590\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 750/3000 [1:01:21<2:43:45, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:51:04.629\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 751/3000 [1:01:26<2:41:17, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:51:08.783\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 752/3000 [1:01:30<2:37:28, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:51:12.753\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 753/3000 [1:01:34<2:43:07, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:51:17.465\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 754/3000 [1:01:40<2:58:39, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:51:23.211\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 755/3000 [1:01:44<2:54:06, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:51:27.585\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 756/3000 [1:01:49<2:48:58, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:51:31.788\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 757/3000 [1:01:54<3:03:19, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:51:37.593\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 758/3000 [1:02:00<3:09:49, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:51:43.084\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 759/3000 [1:02:05<3:08:03, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:51:48.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 760/3000 [1:02:10<3:06:27, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:51:52.913\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 761/3000 [1:02:14<3:02:02, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:51:57.521\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 762/3000 [1:02:18<2:51:14, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:52:01.440\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 763/3000 [1:02:25<3:11:08, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:52:07.818\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 25%|██▌ | 764/3000 [1:02:29<3:07:03, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:52:12.587\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 765/3000 [1:02:34<3:01:01, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:52:17.074\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 766/3000 [1:02:38<2:54:41, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:52:21.374\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 767/3000 [1:02:43<2:55:53, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:52:26.180\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 768/3000 [1:02:48<3:02:56, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:52:31.544\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 769/3000 [1:02:53<2:54:59, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:52:35.758\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 770/3000 [1:02:58<3:06:55, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:52:41.541\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 771/3000 [1:03:03<3:03:50, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:52:46.301\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 772/3000 [1:03:07<2:56:27, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:52:50.595\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 773/3000 [1:03:13<3:02:20, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:52:55.882\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 774/3000 [1:03:17<3:00:34, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:53:00.643\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 775/3000 [1:03:22<2:54:33, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:53:04.977\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 776/3000 [1:03:27<3:05:14, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:53:10.651\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 777/3000 [1:03:32<3:01:25, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:53:15.314\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 778/3000 [1:03:37<3:05:05, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:53:20.547\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 779/3000 [1:03:42<3:01:52, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:53:25.264\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 780/3000 [1:03:47<3:05:50, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:53:30.542\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 781/3000 [1:03:52<2:59:58, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:53:35.044\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 782/3000 [1:03:57<3:00:55, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:53:40.003\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 783/3000 [1:04:01<2:53:29, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:53:44.234\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 784/3000 [1:04:06<2:51:37, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:53:48.768\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 785/3000 [1:04:10<2:47:17, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:53:53.030\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 786/3000 [1:04:15<2:55:48, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:53:58.339\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▌ | 787/3000 [1:04:20<3:01:19, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:54:03.609\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 788/3000 [1:04:25<2:53:35, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:54:07.833\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 789/3000 [1:04:29<2:54:58, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:54:12.674\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 790/3000 [1:04:34<2:56:12, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:54:17.540\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 791/3000 [1:04:39<2:51:30, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:54:21.906\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 792/3000 [1:04:43<2:48:10, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:54:26.269\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 793/3000 [1:04:48<2:50:29, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:54:31.056\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 794/3000 [1:04:53<2:56:54, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:54:36.280\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 26%|██▋ | 795/3000 [1:04:58<2:58:51, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:54:41.276\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 796/3000 [1:05:03<2:56:28, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:54:45.934\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 797/3000 [1:05:07<2:51:43, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:54:50.314\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 798/3000 [1:05:12<2:48:23, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:54:54.696\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 799/3000 [1:05:17<2:57:04, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:55:00.080\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 800/3000 [1:05:21<2:49:51, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:55:04.258\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 801/3000 [1:05:26<2:52:50, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:55:09.169\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 802/3000 [1:05:31<2:51:57, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:55:13.812\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 803/3000 [1:05:35<2:50:18, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:55:18.363\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 804/3000 [1:05:40<2:46:37, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:55:22.685\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 805/3000 [1:05:45<2:52:10, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:55:27.751\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 806/3000 [1:05:49<2:48:09, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:55:32.098\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 807/3000 [1:05:54<2:53:34, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:55:37.198\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 808/3000 [1:05:59<2:53:31, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:55:41.950\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 809/3000 [1:06:04<2:54:40, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:55:46.811\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 810/3000 [1:06:08<2:47:04, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:55:50.907\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 811/3000 [1:06:12<2:41:54, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:55:55.020\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 812/3000 [1:06:17<2:46:44, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:55:59.907\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 813/3000 [1:06:22<2:48:55, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:56:04.686\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 814/3000 [1:06:26<2:42:54, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:56:08.777\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 815/3000 [1:06:30<2:38:28, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:56:12.850\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 816/3000 [1:06:34<2:37:00, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:56:17.074\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 817/3000 [1:06:39<2:44:19, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:56:22.065\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 818/3000 [1:06:44<2:46:25, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:56:26.781\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 819/3000 [1:06:48<2:43:41, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:56:31.113\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 820/3000 [1:06:52<2:43:54, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:56:35.643\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 821/3000 [1:06:57<2:43:09, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:56:40.093\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 822/3000 [1:07:02<2:54:12, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:56:45.607\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 823/3000 [1:07:07<2:54:14, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:56:50.417\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 27%|██▋ | 824/3000 [1:07:11<2:46:50, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:56:54.545\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 825/3000 [1:07:16<2:42:44, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:56:58.776\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 826/3000 [1:07:20<2:40:36, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:57:03.077\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 827/3000 [1:07:24<2:39:23, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:57:07.403\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 828/3000 [1:07:29<2:38:25, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:57:11.722\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 829/3000 [1:07:33<2:36:54, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:57:15.966\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 830/3000 [1:07:38<2:42:34, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:57:20.831\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 831/3000 [1:07:43<2:49:42, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:57:25.992\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 832/3000 [1:07:47<2:45:28, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:57:30.302\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 833/3000 [1:07:52<2:48:41, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:57:35.186\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 834/3000 [1:07:57<2:49:30, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:57:39.939\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 835/3000 [1:08:02<2:56:10, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:57:45.258\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 836/3000 [1:08:07<2:54:07, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:57:49.959\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 837/3000 [1:08:12<2:55:02, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:57:54.878\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 838/3000 [1:08:16<2:48:26, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:57:59.131\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 839/3000 [1:08:21<2:53:34, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:58:04.288\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 840/3000 [1:08:26<2:58:55, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:58:09.610\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 841/3000 [1:08:31<2:52:27, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:58:13.989\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 842/3000 [1:08:35<2:48:36, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:58:18.432\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 843/3000 [1:08:40<2:53:51, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:58:23.614\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 844/3000 [1:08:45<2:48:01, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:58:27.917\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 845/3000 [1:08:49<2:45:41, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:58:32.383\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 846/3000 [1:08:54<2:44:17, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:58:36.873\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 847/3000 [1:08:58<2:40:12, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:58:41.077\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 848/3000 [1:09:03<2:43:58, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:58:45.899\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 849/3000 [1:09:07<2:42:05, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:58:50.303\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 850/3000 [1:09:11<2:38:59, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:58:54.543\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 851/3000 [1:09:16<2:40:26, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:58:59.122\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 852/3000 [1:09:20<2:40:43, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:59:03.634\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 853/3000 [1:09:25<2:45:44, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:59:08.598\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 854/3000 [1:09:31<2:52:02, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:59:13.824\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 28%|██▊ | 855/3000 [1:09:35<2:46:56, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:59:18.166\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▊ | 856/3000 [1:09:40<2:47:59, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:59:22.941\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▊ | 857/3000 [1:09:44<2:45:48, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:59:27.447\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▊ | 858/3000 [1:09:49<2:47:33, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:59:32.259\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▊ | 859/3000 [1:09:53<2:43:47, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:59:36.608\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▊ | 860/3000 [1:09:58<2:45:16, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:59:41.344\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▊ | 861/3000 [1:10:02<2:40:51, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 10:59:45.572\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▊ | 862/3000 [1:10:07<2:40:58, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:59:50.102\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 863/3000 [1:10:12<2:42:52, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:59:54.805\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 864/3000 [1:10:16<2:40:18, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 10:59:59.145\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 865/3000 [1:10:21<2:42:02, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:00:03.818\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 866/3000 [1:10:25<2:43:25, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:00:08.507\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 867/3000 [1:10:30<2:45:45, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:00:13.328\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 868/3000 [1:10:35<2:45:04, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:00:17.934\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 869/3000 [1:10:39<2:43:06, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:00:22.403\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 870/3000 [1:10:44<2:45:56, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:00:27.269\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 871/3000 [1:10:50<2:54:54, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:00:32.792\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 872/3000 [1:10:55<2:59:25, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:00:38.154\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 873/3000 [1:11:01<3:13:13, 5.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:00:44.519\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 874/3000 [1:11:06<3:00:02, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:00:48.737\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 875/3000 [1:11:10<2:52:46, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:00:53.143\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 876/3000 [1:11:15<2:55:35, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:00:58.295\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 877/3000 [1:11:21<3:00:11, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:01:03.695\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 878/3000 [1:11:26<3:08:02, 5.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:01:09.536\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 879/3000 [1:11:31<3:02:36, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:01:14.349\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 880/3000 [1:11:36<3:00:54, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:01:19.362\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 881/3000 [1:11:41<3:00:49, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:01:24.483\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 882/3000 [1:11:46<2:54:01, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:01:28.969\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 883/3000 [1:11:50<2:46:55, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:01:33.235\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 29%|██▉ | 884/3000 [1:11:55<2:49:04, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:01:38.177\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 885/3000 [1:12:00<2:51:14, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:01:43.183\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 886/3000 [1:12:04<2:45:35, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:01:47.515\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 887/3000 [1:12:10<2:52:34, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:01:52.883\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 888/3000 [1:12:13<2:40:18, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:01:56.629\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 889/3000 [1:12:18<2:43:52, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:02:01.528\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 890/3000 [1:12:22<2:37:05, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:02:05.551\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 891/3000 [1:12:27<2:34:49, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:02:09.809\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 892/3000 [1:12:31<2:33:26, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:02:14.090\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 893/3000 [1:12:36<2:40:58, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:02:19.180\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 894/3000 [1:12:40<2:35:04, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:02:23.210\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 895/3000 [1:12:45<2:39:45, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:02:28.080\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 896/3000 [1:12:49<2:35:07, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:02:32.201\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 897/3000 [1:12:54<2:35:49, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:02:36.698\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 898/3000 [1:12:58<2:34:36, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:02:41.035\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|██▉ | 899/3000 [1:13:02<2:32:49, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:02:45.286\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 900/3000 [1:13:07<2:38:23, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:02:50.187\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 901/3000 [1:13:11<2:31:34, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:02:54.070\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 902/3000 [1:13:15<2:30:18, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:02:58.289\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 903/3000 [1:13:21<2:43:31, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:03:03.855\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 904/3000 [1:13:26<2:45:26, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:03:08.724\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 905/3000 [1:13:30<2:39:00, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:03:12.853\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 906/3000 [1:13:34<2:36:59, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:03:17.221\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 907/3000 [1:13:39<2:38:24, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:03:21.863\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 908/3000 [1:13:43<2:40:58, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:03:26.657\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 909/3000 [1:13:48<2:44:01, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:03:31.572\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 910/3000 [1:13:53<2:47:59, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:03:36.665\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 911/3000 [1:13:58<2:46:25, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:03:41.346\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 912/3000 [1:14:03<2:48:51, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:03:46.367\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 913/3000 [1:14:08<2:52:40, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:03:51.592\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 914/3000 [1:14:13<2:52:21, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:03:56.535\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 30%|███ | 915/3000 [1:14:18<2:46:18, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:04:00.920\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 916/3000 [1:14:22<2:35:44, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:04:04.700\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 917/3000 [1:14:26<2:33:14, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:04:08.951\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 918/3000 [1:14:31<2:45:50, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:04:14.582\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 919/3000 [1:14:36<2:45:07, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:04:19.300\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 920/3000 [1:14:41<2:42:30, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:04:23.818\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 921/3000 [1:14:45<2:40:37, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:04:28.331\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 922/3000 [1:14:50<2:38:18, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:04:32.751\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 923/3000 [1:14:54<2:37:59, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:04:37.298\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 924/3000 [1:14:58<2:33:17, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:04:41.417\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 925/3000 [1:15:03<2:34:48, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:04:46.001\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 926/3000 [1:15:08<2:38:11, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:04:50.812\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 927/3000 [1:15:13<2:43:47, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:04:55.935\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 928/3000 [1:15:17<2:37:04, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:05:00.035\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 929/3000 [1:15:22<2:45:04, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:05:05.364\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 930/3000 [1:15:27<2:43:01, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:05:09.955\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 931/3000 [1:15:32<2:44:45, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:05:14.856\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 932/3000 [1:15:36<2:38:48, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:05:19.066\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 933/3000 [1:15:40<2:38:03, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:05:23.608\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 934/3000 [1:15:45<2:34:58, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:05:27.906\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 935/3000 [1:15:49<2:35:22, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:05:32.452\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 936/3000 [1:15:54<2:37:46, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:05:37.206\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███ | 937/3000 [1:15:59<2:40:55, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:05:42.105\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 938/3000 [1:16:03<2:36:49, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:05:46.395\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 939/3000 [1:16:07<2:33:04, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:05:50.603\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 940/3000 [1:16:12<2:30:16, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:05:54.794\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 941/3000 [1:16:16<2:29:33, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:05:59.108\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 942/3000 [1:16:20<2:28:14, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:06:03.346\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 943/3000 [1:16:24<2:27:15, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:06:07.578\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 31%|███▏ | 944/3000 [1:16:29<2:28:06, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:06:11.964\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 945/3000 [1:16:33<2:28:17, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:06:16.311\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 946/3000 [1:16:37<2:28:17, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:06:20.647\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 947/3000 [1:16:42<2:33:36, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:06:25.504\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 948/3000 [1:16:47<2:33:58, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:06:30.036\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 949/3000 [1:16:52<2:36:56, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:06:34.835\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 950/3000 [1:16:56<2:37:00, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:06:39.440\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 951/3000 [1:17:01<2:36:37, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:06:44.006\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 952/3000 [1:17:05<2:34:59, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:06:48.440\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 953/3000 [1:17:10<2:42:03, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:06:53.680\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 954/3000 [1:17:16<2:45:16, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:06:58.752\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 955/3000 [1:17:21<2:48:35, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:07:03.930\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 956/3000 [1:17:25<2:44:57, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:07:08.529\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 957/3000 [1:17:30<2:46:55, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:07:13.572\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 958/3000 [1:17:35<2:44:08, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:07:18.209\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 959/3000 [1:17:39<2:37:28, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:07:22.387\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 960/3000 [1:17:44<2:37:39, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:07:27.042\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 961/3000 [1:17:49<2:38:34, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:07:31.777\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 962/3000 [1:17:53<2:39:31, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:07:36.544\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 963/3000 [1:17:58<2:41:05, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:07:41.402\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 964/3000 [1:18:03<2:44:54, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:07:46.528\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 965/3000 [1:18:08<2:46:34, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:07:51.560\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 966/3000 [1:18:14<2:56:03, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:07:57.413\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 967/3000 [1:18:18<2:45:41, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:08:01.595\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 968/3000 [1:18:24<2:51:02, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:08:07.020\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 969/3000 [1:18:30<3:01:26, 5.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:08:13.102\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 970/3000 [1:18:35<3:01:24, 5.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:08:18.467\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 971/3000 [1:18:40<2:56:35, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:08:23.363\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 972/3000 [1:18:45<2:50:20, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:08:27.978\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 973/3000 [1:18:50<2:50:03, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:08:32.998\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▏ | 974/3000 [1:18:55<2:49:57, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:08:38.030\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 32%|███▎ | 975/3000 [1:19:00<2:54:24, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:08:43.512\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 976/3000 [1:19:05<2:49:23, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:08:48.191\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 977/3000 [1:19:10<2:51:56, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:08:53.474\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 978/3000 [1:19:14<2:41:48, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:08:57.579\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 979/3000 [1:19:20<2:46:17, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:09:02.832\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 980/3000 [1:19:24<2:42:37, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:09:07.414\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 981/3000 [1:19:29<2:38:42, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:09:11.864\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 982/3000 [1:19:33<2:35:23, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:09:16.260\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 983/3000 [1:19:38<2:35:48, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:09:20.928\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 984/3000 [1:19:42<2:35:08, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:09:25.506\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 985/3000 [1:19:47<2:36:06, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:09:30.227\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 986/3000 [1:19:52<2:42:33, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:09:35.524\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 987/3000 [1:19:57<2:38:28, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:09:39.969\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 988/3000 [1:20:01<2:34:14, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:09:44.279\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 989/3000 [1:20:06<2:32:43, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:09:48.735\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 990/3000 [1:20:10<2:34:18, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:09:53.457\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 991/3000 [1:20:15<2:33:07, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:09:57.953\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 992/3000 [1:20:19<2:34:12, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:10:02.642\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 993/3000 [1:20:24<2:36:13, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:10:07.458\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 994/3000 [1:20:29<2:36:58, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:10:12.211\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 995/3000 [1:20:35<2:44:53, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:10:17.703\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 996/3000 [1:20:40<2:52:46, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:10:23.433\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 997/3000 [1:20:45<2:49:22, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:10:28.275\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 998/3000 [1:20:50<2:46:01, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:10:33.023\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 999/3000 [1:20:54<2:41:07, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:10:37.517\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 1000/3000 [1:20:59<2:39:01, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:10:42.147\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 1001/3000 [1:21:04<2:44:16, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:10:47.450\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 1002/3000 [1:21:10<2:49:29, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:10:52.912\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 1003/3000 [1:21:15<2:53:55, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:10:58.454\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 33%|███▎ | 1004/3000 [1:21:20<2:46:42, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:11:02.965\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 1005/3000 [1:21:25<2:46:49, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:11:07.997\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 1006/3000 [1:21:31<2:55:15, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:11:13.867\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 1007/3000 [1:21:35<2:45:48, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:11:18.202\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 1008/3000 [1:21:41<2:53:18, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:11:23.955\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 1009/3000 [1:21:46<2:49:20, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:11:28.785\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 1010/3000 [1:21:51<2:57:08, 5.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:11:34.681\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 1011/3000 [1:21:57<2:54:46, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:11:39.793\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▎ | 1012/3000 [1:22:01<2:48:52, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:11:44.481\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1013/3000 [1:22:07<2:50:28, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:11:49.747\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1014/3000 [1:22:12<2:55:46, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:11:55.436\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1015/3000 [1:22:17<2:52:57, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:12:00.473\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1016/3000 [1:22:22<2:49:21, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:12:05.346\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1017/3000 [1:22:27<2:45:37, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:12:10.099\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1018/3000 [1:22:32<2:48:23, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:12:15.400\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1019/3000 [1:22:37<2:46:19, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:12:20.296\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1020/3000 [1:22:43<2:50:07, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:12:25.726\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1021/3000 [1:22:48<2:54:13, 5.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:12:31.304\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1022/3000 [1:22:53<2:51:06, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:12:36.280\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1023/3000 [1:22:59<2:59:33, 5.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:12:42.335\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1024/3000 [1:23:04<2:50:23, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:12:46.866\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1025/3000 [1:23:09<2:55:23, 5.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:12:52.555\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1026/3000 [1:23:15<2:54:35, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:12:57.811\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1027/3000 [1:23:19<2:49:16, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:13:02.588\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1028/3000 [1:23:24<2:44:57, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:13:07.306\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1029/3000 [1:23:30<2:50:37, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:13:12.908\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1030/3000 [1:23:35<2:52:36, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:13:18.312\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1031/3000 [1:23:40<2:50:02, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:13:23.318\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1032/3000 [1:23:45<2:48:48, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:13:28.383\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1033/3000 [1:23:51<2:50:48, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:13:33.741\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1034/3000 [1:23:56<2:54:39, 5.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:13:39.353\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 34%|███▍ | 1035/3000 [1:24:01<2:44:45, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:13:43.685\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 1036/3000 [1:24:06<2:44:39, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:13:48.713\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 1037/3000 [1:24:11<2:50:14, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:13:54.321\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 1038/3000 [1:24:17<2:55:09, 5.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:14:00.035\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 1039/3000 [1:24:21<2:47:23, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:14:04.608\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 1040/3000 [1:24:26<2:46:37, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:14:09.660\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 1041/3000 [1:24:31<2:40:35, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:14:14.154\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 1042/3000 [1:24:36<2:42:46, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:14:19.304\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 1043/3000 [1:24:41<2:36:59, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:14:23.709\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 1044/3000 [1:24:45<2:30:48, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:14:27.899\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 1045/3000 [1:24:49<2:30:49, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:14:32.535\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 1046/3000 [1:24:54<2:30:40, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:14:37.155\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 1047/3000 [1:24:59<2:30:58, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:14:41.822\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 1048/3000 [1:25:03<2:29:53, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:14:46.357\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▍ | 1049/3000 [1:25:09<2:45:28, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:14:52.569\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 1050/3000 [1:25:15<2:48:52, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:14:58.016\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 1051/3000 [1:25:20<2:44:31, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:15:02.774\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 1052/3000 [1:25:24<2:42:05, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:15:07.598\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 1053/3000 [1:25:31<2:53:08, 5.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:15:13.733\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 1054/3000 [1:25:36<2:49:36, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:15:18.715\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 1055/3000 [1:25:41<2:54:16, 5.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:15:24.433\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 1056/3000 [1:25:46<2:50:35, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:15:29.440\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 1057/3000 [1:25:51<2:46:30, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:15:34.294\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 1058/3000 [1:25:57<2:49:02, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:15:39.706\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 1059/3000 [1:26:02<2:55:13, 5.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:15:45.574\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 1060/3000 [1:26:07<2:51:41, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:15:50.635\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 1061/3000 [1:26:13<2:54:20, 5.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:15:56.229\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 1062/3000 [1:26:19<3:03:55, 5.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:16:02.621\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 1063/3000 [1:26:25<3:03:11, 5.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:16:08.249\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 35%|███▌ | 1064/3000 [1:26:30<2:56:16, 5.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:16:13.219\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1065/3000 [1:26:35<2:50:55, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:16:18.139\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1066/3000 [1:26:41<2:53:13, 5.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:16:23.686\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1067/3000 [1:26:46<2:49:55, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:16:28.728\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1068/3000 [1:26:51<2:48:20, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:16:33.847\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1069/3000 [1:26:55<2:42:18, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:16:38.460\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1070/3000 [1:27:02<2:56:47, 5.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:16:45.012\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1071/3000 [1:27:07<2:49:23, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:16:49.750\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1072/3000 [1:27:12<2:52:08, 5.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:16:55.313\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1073/3000 [1:27:17<2:46:45, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:17:00.121\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1074/3000 [1:27:23<2:52:15, 5.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:17:05.894\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1075/3000 [1:27:28<2:48:46, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:17:10.906\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1076/3000 [1:27:33<2:48:45, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:17:16.174\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1077/3000 [1:27:39<2:54:14, 5.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:17:22.017\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1078/3000 [1:27:43<2:44:56, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:17:26.496\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1079/3000 [1:27:49<2:49:44, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:17:32.152\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1080/3000 [1:27:54<2:43:42, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:17:36.836\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1081/3000 [1:27:58<2:41:03, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:17:41.684\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1082/3000 [1:28:03<2:38:42, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:17:46.484\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1083/3000 [1:28:09<2:43:26, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:17:51.950\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1084/3000 [1:28:14<2:44:18, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:17:57.165\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1085/3000 [1:28:19<2:46:14, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:18:02.521\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1086/3000 [1:28:24<2:38:38, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:18:06.945\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▌ | 1087/3000 [1:28:29<2:39:09, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:18:11.980\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 1088/3000 [1:28:34<2:36:54, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:18:16.746\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 1089/3000 [1:28:39<2:41:49, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:18:22.193\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 1090/3000 [1:28:44<2:37:18, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:18:26.809\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 1091/3000 [1:28:48<2:36:11, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:18:31.643\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 1092/3000 [1:28:54<2:41:15, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:18:37.092\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 1093/3000 [1:29:00<2:46:32, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:18:42.725\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 1094/3000 [1:29:05<2:47:36, 5.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:18:48.086\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 36%|███▋ | 1095/3000 [1:29:09<2:39:44, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:18:52.546\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1096/3000 [1:29:15<2:45:35, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:18:58.201\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1097/3000 [1:29:19<2:37:14, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:19:02.550\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1098/3000 [1:29:25<2:40:19, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:19:07.841\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1099/3000 [1:31:00<16:56:21, 32.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:20:42.969\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1100/3000 [1:31:06<12:49:31, 24.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:20:49.121\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1101/3000 [1:31:11<9:48:30, 18.59s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:20:54.400\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1102/3000 [1:31:17<7:42:11, 14.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:20:59.716\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1103/3000 [1:31:21<6:08:04, 11.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:21:04.430\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1104/3000 [1:31:26<5:01:41, 9.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:21:09.090\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1105/3000 [1:31:31<4:22:07, 8.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:21:14.479\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1106/3000 [1:31:38<4:05:12, 7.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:21:21.006\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1107/3000 [1:31:43<3:40:37, 6.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:21:26.191\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1108/3000 [1:31:48<3:21:58, 6.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:21:31.225\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1109/3000 [1:31:53<3:08:36, 5.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:21:36.227\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1110/3000 [1:31:58<2:56:10, 5.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:21:40.907\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1111/3000 [1:32:03<2:49:10, 5.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:21:45.768\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1112/3000 [1:32:08<2:48:12, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:21:51.049\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1113/3000 [1:32:13<2:45:23, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:21:56.104\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1114/3000 [1:32:19<2:50:38, 5.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:22:01.931\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1115/3000 [1:32:24<2:45:17, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:22:06.801\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1116/3000 [1:32:32<3:11:18, 6.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:22:14.833\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1117/3000 [1:32:36<2:57:06, 5.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:22:19.429\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1118/3000 [1:32:41<2:52:43, 5.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:22:24.617\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1119/3000 [1:32:46<2:47:05, 5.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:22:29.534\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1120/3000 [1:32:51<2:42:30, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:22:34.386\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1121/3000 [1:32:56<2:36:39, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:22:38.959\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1122/3000 [1:33:01<2:40:10, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:22:44.345\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1123/3000 [1:33:07<2:44:01, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:22:49.881\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 37%|███▋ | 1124/3000 [1:33:12<2:41:25, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:22:54.856\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1125/3000 [1:33:16<2:34:57, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:22:59.339\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1126/3000 [1:33:21<2:35:19, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:23:04.346\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1127/3000 [1:33:26<2:32:26, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:23:09.019\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1128/3000 [1:33:31<2:32:12, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:23:13.887\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1129/3000 [1:33:36<2:32:47, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:23:18.837\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1130/3000 [1:33:40<2:31:50, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:23:23.643\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1131/3000 [1:33:46<2:34:28, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:23:28.806\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1132/3000 [1:33:50<2:33:07, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:23:33.630\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1133/3000 [1:33:57<2:46:00, 5.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:23:39.936\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1134/3000 [1:34:01<2:33:50, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:23:43.977\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1135/3000 [1:34:06<2:33:55, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:23:48.942\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1136/3000 [1:34:10<2:30:36, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:23:53.546\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1137/3000 [1:34:16<2:35:47, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:23:58.959\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1138/3000 [1:34:21<2:41:19, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:24:04.579\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1139/3000 [1:34:26<2:39:46, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:24:09.620\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1140/3000 [1:34:31<2:36:27, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:24:14.425\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1141/3000 [1:34:36<2:35:59, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:24:19.431\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1142/3000 [1:34:41<2:29:58, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:24:23.827\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1143/3000 [1:34:45<2:29:55, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:24:28.673\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1144/3000 [1:34:50<2:29:47, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:24:33.512\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1145/3000 [1:34:56<2:39:40, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:24:39.428\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1146/3000 [1:35:02<2:47:08, 5.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:24:45.407\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1147/3000 [1:35:07<2:38:52, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:24:49.935\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1148/3000 [1:35:11<2:32:02, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:24:54.350\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1149/3000 [1:35:17<2:41:16, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:25:00.283\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1150/3000 [1:35:21<2:31:30, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:25:04.464\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1151/3000 [1:35:27<2:39:18, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:25:10.229\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1152/3000 [1:35:32<2:33:35, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:25:14.791\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1153/3000 [1:35:37<2:37:58, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:25:20.261\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1154/3000 [1:35:42<2:32:27, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:25:24.804\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 38%|███▊ | 1155/3000 [1:35:46<2:29:33, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:25:29.455\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▊ | 1156/3000 [1:35:52<2:35:02, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:25:34.922\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▊ | 1157/3000 [1:35:56<2:26:16, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:25:39.025\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▊ | 1158/3000 [1:36:01<2:30:08, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:25:44.215\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▊ | 1159/3000 [1:36:06<2:28:07, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:25:48.896\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▊ | 1160/3000 [1:36:10<2:24:55, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:25:53.384\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▊ | 1161/3000 [1:36:15<2:28:40, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:25:58.526\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▊ | 1162/3000 [1:36:20<2:30:12, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:26:03.553\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1163/3000 [1:36:25<2:31:49, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:26:08.640\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1164/3000 [1:36:30<2:29:17, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:26:13.332\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1165/3000 [1:36:35<2:28:49, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:26:18.169\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1166/3000 [1:36:40<2:31:10, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:26:23.300\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1167/3000 [1:36:45<2:27:16, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:26:27.830\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1168/3000 [1:36:49<2:20:18, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:26:31.899\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1169/3000 [1:36:53<2:17:56, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:26:36.243\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1170/3000 [1:36:59<2:28:35, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:26:41.936\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1171/3000 [1:37:03<2:25:29, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:26:46.478\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1172/3000 [1:37:09<2:31:58, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:26:51.969\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1173/3000 [1:37:14<2:35:13, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:26:57.322\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1174/3000 [1:37:19<2:33:51, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:27:02.280\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1175/3000 [1:37:24<2:33:38, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:27:07.321\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1176/3000 [1:37:29<2:28:52, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:27:11.859\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1177/3000 [1:37:34<2:32:22, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:27:17.149\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1178/3000 [1:37:40<2:41:11, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:27:23.141\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1179/3000 [1:37:45<2:41:29, 5.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:27:28.491\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1180/3000 [1:37:51<2:41:34, 5.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:27:33.832\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1181/3000 [1:37:55<2:36:13, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:27:38.580\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1182/3000 [1:38:01<2:37:21, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:27:43.867\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1183/3000 [1:38:06<2:38:07, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:27:49.154\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 39%|███▉ | 1184/3000 [1:38:11<2:35:29, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:27:54.094\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 1185/3000 [1:38:15<2:26:18, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:27:58.230\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 1186/3000 [1:38:20<2:26:21, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:28:03.082\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 1187/3000 [1:38:25<2:29:16, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:28:08.254\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 1188/3000 [1:38:31<2:33:53, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:28:13.712\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 1189/3000 [1:38:35<2:30:00, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:28:18.388\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 1190/3000 [1:38:41<2:39:11, 5.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:28:24.381\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 1191/3000 [1:38:46<2:38:43, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:28:29.617\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 1192/3000 [1:38:52<2:41:24, 5.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:28:35.188\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 1193/3000 [1:38:57<2:40:01, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:28:40.401\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 1194/3000 [1:39:02<2:33:51, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:28:45.042\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 1195/3000 [1:39:08<2:40:35, 5.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:28:50.908\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 1196/3000 [1:39:12<2:35:07, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:28:55.650\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 1197/3000 [1:39:17<2:29:44, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:29:00.222\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 1198/3000 [1:39:23<2:37:28, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:29:06.072\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|███▉ | 1199/3000 [1:39:29<2:41:05, 5.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:29:11.728\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1200/3000 [1:39:33<2:34:15, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:29:16.345\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1201/3000 [1:39:38<2:34:57, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:29:21.575\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1202/3000 [1:39:44<2:35:40, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:29:26.832\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1203/3000 [1:39:48<2:31:36, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:29:31.584\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1204/3000 [1:39:54<2:34:30, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:29:36.979\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1205/3000 [1:39:59<2:33:53, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:29:42.081\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1206/3000 [1:40:04<2:37:05, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:29:47.592\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1207/3000 [1:40:10<2:38:13, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:29:52.982\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1208/3000 [1:40:14<2:31:37, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:29:57.549\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1209/3000 [1:40:19<2:28:39, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:30:02.305\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1210/3000 [1:40:24<2:26:42, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:30:07.076\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1211/3000 [1:40:29<2:26:58, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:30:12.034\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1212/3000 [1:40:33<2:24:12, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:30:16.663\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1213/3000 [1:40:39<2:27:59, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:30:21.933\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1214/3000 [1:40:44<2:32:33, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:30:27.424\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 40%|████ | 1215/3000 [1:40:50<2:34:26, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:30:32.770\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1216/3000 [1:40:55<2:35:20, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:30:38.071\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1217/3000 [1:41:00<2:33:28, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:30:43.096\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1218/3000 [1:41:05<2:31:05, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:30:48.002\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1219/3000 [1:41:09<2:23:15, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:30:52.219\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1220/3000 [1:41:14<2:28:28, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:30:57.641\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1221/3000 [1:41:20<2:37:01, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:31:03.616\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1222/3000 [1:41:25<2:34:40, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:31:08.658\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1223/3000 [1:41:31<2:36:01, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:31:14.039\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1224/3000 [1:41:36<2:37:56, 5.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:31:19.532\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1225/3000 [1:41:43<2:47:50, 5.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:31:25.994\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1226/3000 [1:41:48<2:40:02, 5.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:31:30.799\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1227/3000 [1:41:53<2:35:26, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:31:35.704\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1228/3000 [1:41:57<2:30:13, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:31:40.386\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1229/3000 [1:42:02<2:31:24, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:31:45.615\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1230/3000 [1:42:08<2:32:02, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:31:50.826\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1231/3000 [1:42:12<2:24:51, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:31:55.176\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1232/3000 [1:42:17<2:25:35, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:32:00.182\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1233/3000 [1:42:23<2:32:26, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:32:05.908\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1234/3000 [1:42:28<2:29:34, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:32:10.769\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1235/3000 [1:42:32<2:26:14, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:32:15.483\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1236/3000 [1:42:37<2:26:23, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:32:20.480\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████ | 1237/3000 [1:42:42<2:23:47, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:32:25.175\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 1238/3000 [1:42:47<2:21:17, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:32:29.793\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 1239/3000 [1:42:52<2:23:20, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:32:34.847\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 1240/3000 [1:42:56<2:21:55, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:32:39.579\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 1241/3000 [1:43:01<2:20:10, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:32:44.228\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 1242/3000 [1:43:07<2:26:55, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:32:49.785\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 1243/3000 [1:43:11<2:24:46, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:32:54.565\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 41%|████▏ | 1244/3000 [1:43:17<2:26:49, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:32:59.751\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1245/3000 [1:43:21<2:18:57, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:33:03.881\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639904.746268909)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639914.264041655)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639922.695742967)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639900.220062308)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639909.717789626)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639918.65346065)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639931.723464885)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639941.011999829)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639950.324868357)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639960.182580417)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639936.490291704)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639945.584812066)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639955.512054602)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639964.789586745)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639970.495762974)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639980.90343043)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639991.664885039)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640000.756499956)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639986.634676141)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1639996.250499829)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639975.853339901)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640010.055994116)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640017.755424076)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640026.613152248)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640035.725746807)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640013.71239578)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640022.133092644)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640030.988820034)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640045.006188964)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640053.528401713)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640063.93847617)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640073.933471145)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640049.712249863)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640058.414352106)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640069.1680898)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640078.270582672)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640082.893268842)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640090.950291438)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640100.654724738)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640110.087221594)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640087.255821939)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640095.723780298)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640105.341963332)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640118.441303378)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640127.47904251)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640137.394569989)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640147.384943423)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640122.796099374)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640131.984396526)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640142.404233839)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640152.135807891)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640157.17492847)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640165.279858184)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640175.212772441)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640184.261660895)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640169.468504307)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1640179.650607757)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640161.374678477)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640194.578355692)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640203.167566626)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640212.49810835)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640222.74051059)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640199.083213736)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640207.590435061)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640217.472755576)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640231.93757551)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640240.553791764)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640250.544667136)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640259.220045102)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640235.817724122)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640245.998422681)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640255.103620831)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640268.884903399)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640278.40189993)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640287.46531552)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640296.681509456)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640274.519108003)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640283.351144655)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640291.950710536)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640305.938997383)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640315.74460263)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640324.878006157)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640333.512555849)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640320.298527837)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1640329.140325626)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640310.967927716)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640343.066977556)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640351.288611856)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640359.701810511)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640368.585449729)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640347.340312975)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640355.924562543)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640364.036270424)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640377.893660422)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640387.412872944)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640396.511220141)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640406.830073873)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640392.036584324)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640400.887253811)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640382.55633903)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640415.948123671)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640425.519193447)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640435.239464064)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640444.416985841)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640421.057237112)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640430.645504659)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640439.775281784)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640452.939544359)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640463.276761126)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640473.618237338)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640483.680125351)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640478.986295003)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640458.54118147)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640468.888778759)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640492.966340875)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640502.792606197)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640512.734746245)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640523.846965848)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640497.48746722)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640508.091814536)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640517.28626717)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640532.908308858)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640541.505264044)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640550.512460358)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640559.353155139)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640537.180459267)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640546.008998311)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640555.239152571)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640567.078634562)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640576.805604694)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640586.487710462)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640595.923214915)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640571.887369599)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640581.888278328)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640591.638875808)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640605.583347356)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640615.584955232)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640627.774946621)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640639.835617196)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640609.917804176)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1640620.942546128)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640634.282299771)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640648.587216927)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640657.659144697)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640666.998993011)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640677.270596082)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640662.79482693)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640672.167074756)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640653.313827993)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640687.501582024)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640696.708347683)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640705.564260475)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640714.116116327)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640701.639560539)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640709.567375272)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640692.00437379)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640723.5572759)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640732.764314062)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640743.758921901)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640754.237657621)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640727.523254893)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640738.505136311)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640749.181691932)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640762.873647251)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640772.782394522)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640781.887262418)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640790.681646415)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640768.081059015)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1640777.376302494)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640786.110987574)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640802.61458766)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640812.263951477)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640821.685428545)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640831.382969201)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640807.237326682)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640817.002351149)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640826.8361303)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1639926.66944088)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640005.565450453)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640040.57221652)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640114.358266853)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640189.166455379)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640227.079930476)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640264.337241881)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640301.682822462)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640337.932831095)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640373.329926185)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640449.110193556)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640528.194268745)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640563.313358769)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640600.758214114)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640644.023914977)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640718.900352801)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640796.041357378)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640836.423458606)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1640411.134697221)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640488.269899197)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640681.78036078)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640758.931593972)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640841.874504097)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640850.419856195)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640858.995402749)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640869.133474118)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640854.295107949)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640862.775641491)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640873.795953891)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640845.631473409)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640878.092430646)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640883.472549051)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640887.462737539)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640892.029678286)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640901.2888798)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640905.569338949)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640911.588272053)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640919.585714544)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640924.261130683)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640896.960381479)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640933.261714795)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640937.985075693)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640942.408757878)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640951.057096342)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640956.066616847)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640946.668561823)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640928.818666618)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640960.714558007)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640964.797644856)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640974.224488661)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640980.626631885)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640984.861173745)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640989.178594923)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1640998.758750532)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641003.935133517)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640969.64715839)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1640994.125533949)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641013.723287533)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641020.782042023)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641024.829191798)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641033.638264786)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641028.875645249)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641038.204875189)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641008.494342999)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641043.34248161)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641047.924955748)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641062.838736893)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641067.051674785)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641076.788048007)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641085.766358656)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641092.100388149)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641080.997672745)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641071.428069594)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641097.302607153)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641105.114508072)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641114.145512388)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641123.077667474)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641127.317086012)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641101.293109242)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641109.61727417)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641118.556245764)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641132.814173477)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641142.189832538)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641153.232451863)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641164.369100706)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641168.913351851)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641137.954121082)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641147.979184666)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641159.637315008)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641173.643701707)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641182.536543835)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641191.641484986)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641200.654114374)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641205.213838921)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641177.862814313)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641187.484584245)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641196.064592405)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641210.648761829)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641219.959530556)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641229.461203302)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641238.42850974)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641243.730649716)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641215.533591417)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641224.805184633)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1641233.753301654)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641248.448815506)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641257.942026895)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641266.327500851)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641276.100228023)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641253.298291938)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641262.497679165)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641271.090338516)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641280.890465733)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641285.499963974)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641294.662442452)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641304.238518772)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641313.850594974)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641318.686124298)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641289.995773608)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641299.26010474)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641309.162511097)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641323.747145047)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641331.857294977)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641341.286845474)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641349.931594939)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641354.353578366)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641327.695325123)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641336.116449898)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641345.596230793)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641359.306946411)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641368.926986006)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641378.716340589)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1641388.203117069)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641393.012265375)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641364.165406087)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641373.964824164)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641382.859943468)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641397.363393507)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641405.579830489)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641415.113095445)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641424.438038973)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641428.74098483)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641401.837187963)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641420.227431913)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641410.718864546)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641433.90982557)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641442.350112895)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641451.467886304)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641460.080260012)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641464.818747768)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641438.254692267)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641446.855966615)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641455.419428325)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641469.580937907)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641479.033493566)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641489.869046191)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641498.955507703)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641503.746980042)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641474.321790505)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641484.460802424)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1641494.766347506)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641508.76578775)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641518.552661449)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641529.505357801)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641543.445861287)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641547.832085398)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641513.615144329)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641524.483865813)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641538.979780147)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641551.909485032)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641561.591125549)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641570.094573744)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641578.8805473)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641583.830961102)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641557.443468975)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641565.970904044)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641574.465831191)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641588.907805268)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641598.205954195)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641606.508058771)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641615.537388986)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641620.11641495)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641594.261652029)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641602.799267587)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641610.545829869)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641624.15548889)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641632.279875552)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641642.737845141)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1641651.314842184)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641657.119287791)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641628.309805151)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641636.991823017)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641647.111780211)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641662.610736705)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641672.439232902)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641680.966931861)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641692.113377629)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641696.600074339)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641667.53993603)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641677.047150925)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641687.344759767)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641700.900506797)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641711.070973476)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641721.067598243)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641730.121271094)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641735.407963023)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641705.706724256)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641715.284495683)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641725.827203019)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641740.169063546)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641750.177860225)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641760.073581276)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641770.068529514)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641774.570020654)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641744.50388958)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641754.840141045)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1641764.790363629)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641779.529615037)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641788.294866326)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641797.865053842)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641807.359324491)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641812.200593858)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641783.760857313)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641792.55652188)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641803.135207763)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641817.066711526)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641825.795996459)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641835.806664388)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641845.460922547)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641849.840335894)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641821.43243658)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641840.802290656)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641830.582871273)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641854.222373778)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641863.784308115)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641873.338451787)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641882.211486357)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641887.277358967)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641859.607006048)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641868.695448738)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641877.889415624)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641891.624754765)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641901.476239359)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641910.433934826)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1641919.433836295)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641924.212655975)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641896.724879506)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641906.337886312)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641914.546933454)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641928.303206335)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641936.600641111)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641946.307401555)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641955.169428362)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641959.619133374)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641932.376536134)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641941.591147023)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641950.639090986)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641965.133506461)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641974.071803845)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641982.603337217)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641991.248888935)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1641995.492142155)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641969.943314235)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641978.302713586)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1641986.929695528)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642000.357335055)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642009.828889225)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642019.46543856)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642029.485046735)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642034.404488599)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642005.518031871)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642024.784113522)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1642014.712692536)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642038.657406363)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642049.136378777)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642057.95802775)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642067.443560229)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642071.909262907)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642043.814728164)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642053.515510194)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642063.140735819)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642076.399895239)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642085.425829829)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642094.069451657)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642103.160791413)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642108.124622443)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642080.603579547)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642089.829156217)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642098.648863384)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642113.350909508)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642122.467771643)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642131.785719268)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642140.870234705)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642145.098645402)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642117.692542798)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642126.973096123)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642136.134763002)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642149.62859338)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642158.67109563)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642168.033832792)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1642177.460159722)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642181.929419041)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642154.331837462)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642163.34432203)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642172.85450243)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642186.795486115)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642197.68076618)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642208.263668813)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642217.821096284)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642223.221736054)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642192.31835492)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642204.045410798)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642212.669063751)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642229.06283129)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642238.888250249)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642248.495119346)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642257.703059155)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642262.70980253)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642233.875755248)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642244.009377015)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642252.761423959)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642267.041620852)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642276.155559458)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642285.077089301)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642293.616049014)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642298.706339118)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642281.054629869)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642289.335804319)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1642272.409797138)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642302.73657866)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642311.727482038)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642320.561315043)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642329.713417921)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642333.596040182)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642307.606723682)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642316.224514235)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642324.812535049)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642337.815350254)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642348.250606732)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642356.747792942)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642366.183097242)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642371.098085448)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642343.381855275)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642352.379342471)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642361.389143103)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642376.191746099)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642385.893655356)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642396.061351264)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642404.226266665)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642408.47705729)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642380.872011012)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642391.118513671)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642400.446342076)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642414.108181613)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642423.344105711)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642432.277112633)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1642440.943569134)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642445.527504261)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642418.826291638)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642427.857933092)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642436.824966142)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642450.338051263)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642459.561419072)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642469.481565013)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642478.592417019)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642483.134982903)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642455.461191825)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642464.890514089)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642474.382439043)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642487.432386563)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642496.732419309)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642505.921801986)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642514.32004525)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642518.634963752)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642491.978295903)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642501.631526224)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642510.129303725)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642522.872485043)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642531.490785849)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642540.173620836)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642549.562920526)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642554.361113075)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642527.105026319)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642535.83729838)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1642545.030986981)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642558.966466199)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642567.966935667)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642578.278334321)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642588.055398961)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642593.097990154)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642573.2061908)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642583.456917411)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642563.532884249)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642597.735692319)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642606.568253234)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642616.070061092)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642626.054949739)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642631.086757232)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642601.913384272)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642611.303547117)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642620.928040613)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642636.939218714)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642646.546264785)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642657.993952147)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642667.504543345)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642672.524438756)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642641.12131849)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642652.628936188)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642662.889630989)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642677.556877009)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642687.71748501)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642697.105283728)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1642706.940253174)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642711.390751756)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642683.038014358)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642693.000654543)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642702.358734314)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642715.786042727)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642725.032154272)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642735.050223567)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642743.805662513)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642748.261808765)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642720.455021237)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642729.753231322)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642739.495680203)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642752.983528578)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642762.168577484)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642771.737239636)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642782.959119214)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642787.801472473)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642757.479083555)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642777.229665383)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642766.98464773)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642792.549524546)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642801.673545087)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642812.438047742)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642822.491443594)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642827.52325555)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642797.043870889)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642806.976437549)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1642817.980830077)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642833.393856939)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642843.481216797)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642854.207006338)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642864.00759513)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642869.273214439)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642837.728631751)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642848.311984667)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642859.319980094)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642874.962786377)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642884.872545323)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642894.926189948)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642905.252829185)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642910.830952988)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642879.999615652)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642889.62589154)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642899.822984102)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642915.806874615)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642926.39224511)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642937.336752861)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642946.832981032)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642952.434803994)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642921.861427878)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642932.081465162)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642942.114321773)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642957.838861325)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642967.909703094)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642978.879366105)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1642988.239927655)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642993.847240072)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642962.844657972)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1642983.211542024)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642973.267771733)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1642999.561659276)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643009.186606981)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643018.830339305)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643027.425511367)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643032.061219968)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643004.134585075)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643023.235980302)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643013.680021632)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643036.681503584)])']\n", "connector: \n", "Evaluating workflow: 42%|████▏ | 1246/3000 [1:43:27<2:34:55, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:33:10.461\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1247/3000 [1:43:33<2:34:40, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:33:15.743\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1248/3000 [1:43:38<2:34:31, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:33:21.030\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1249/3000 [1:43:43<2:30:04, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:33:25.824\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1250/3000 [1:43:48<2:29:07, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:33:30.868\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1251/3000 [1:43:52<2:24:17, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:33:35.439\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1252/3000 [1:43:57<2:24:41, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:33:40.442\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1253/3000 [1:44:03<2:27:14, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:33:45.712\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1254/3000 [1:44:08<2:32:31, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:33:51.382\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1255/3000 [1:44:13<2:26:37, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:33:55.958\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1256/3000 [1:44:19<2:33:41, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:34:01.820\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1257/3000 [1:44:24<2:30:27, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:34:06.746\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1258/3000 [1:44:29<2:31:52, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:34:12.098\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1259/3000 [1:44:35<2:38:21, 5.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:34:18.084\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1260/3000 [1:44:40<2:36:34, 5.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:34:23.346\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1261/3000 [1:44:45<2:28:02, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:34:27.775\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1262/3000 [1:44:49<2:26:07, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:34:32.671\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1263/3000 [1:44:54<2:19:27, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:34:36.958\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1264/3000 [1:44:59<2:24:39, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:34:42.384\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1265/3000 [1:45:04<2:19:47, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:34:46.832\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1266/3000 [1:45:08<2:17:58, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:34:51.466\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1267/3000 [1:45:13<2:19:24, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:34:56.415\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1268/3000 [1:45:18<2:19:31, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:35:01.264\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1269/3000 [1:45:23<2:24:08, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:35:06.640\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1270/3000 [1:45:29<2:28:56, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:35:12.201\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1271/3000 [1:45:34<2:26:39, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:35:17.112\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1272/3000 [1:45:38<2:21:31, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:35:21.617\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1273/3000 [1:45:44<2:27:31, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:35:27.235\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▏ | 1274/3000 [1:45:51<2:42:34, 5.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:35:34.114\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 42%|████▎ | 1275/3000 [1:45:56<2:35:31, 5.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:35:38.960\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1276/3000 [1:46:01<2:30:24, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:35:43.787\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1277/3000 [1:46:06<2:32:24, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:35:49.263\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1278/3000 [1:46:11<2:30:54, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:35:54.406\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1279/3000 [1:46:16<2:23:42, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:35:58.837\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1280/3000 [1:46:20<2:20:30, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:36:03.486\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1281/3000 [1:46:25<2:20:08, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:36:08.353\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1282/3000 [1:46:30<2:20:03, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:36:13.245\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1283/3000 [1:46:35<2:22:56, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:36:18.482\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1284/3000 [1:46:41<2:29:31, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:36:24.253\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1285/3000 [1:46:46<2:26:18, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:36:29.117\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1286/3000 [1:46:52<2:31:12, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:36:34.817\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1287/3000 [1:46:56<2:26:10, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:36:39.533\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1288/3000 [1:47:01<2:26:11, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:36:44.664\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1289/3000 [1:47:06<2:19:08, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:36:48.974\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1290/3000 [1:47:12<2:26:52, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:36:54.767\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1291/3000 [1:47:16<2:21:42, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:36:59.326\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1292/3000 [1:47:22<2:27:32, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:37:04.995\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1293/3000 [1:47:27<2:23:22, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:37:09.699\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1294/3000 [1:47:31<2:20:14, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:37:14.381\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1295/3000 [1:47:37<2:25:54, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:37:19.989\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1296/3000 [1:47:41<2:16:58, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:37:24.085\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1297/3000 [1:47:46<2:16:31, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:37:28.864\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1298/3000 [1:47:50<2:12:17, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:37:33.186\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1299/3000 [1:47:55<2:13:38, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:37:38.018\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1300/3000 [1:47:59<2:11:57, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:37:42.544\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1301/3000 [1:48:05<2:17:09, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:37:47.823\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1302/3000 [1:48:09<2:14:16, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:37:52.337\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1303/3000 [1:48:16<2:29:25, 5.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:37:58.875\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 43%|████▎ | 1304/3000 [1:48:20<2:24:11, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:38:03.553\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1305/3000 [1:48:25<2:19:39, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:38:08.129\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1306/3000 [1:48:30<2:22:43, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:38:13.444\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1307/3000 [1:48:35<2:20:54, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:38:18.294\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1308/3000 [1:48:41<2:26:08, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:38:23.916\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1309/3000 [1:48:46<2:23:25, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:38:28.788\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1310/3000 [1:48:50<2:20:00, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:38:33.483\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1311/3000 [1:48:56<2:26:00, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:38:39.174\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▎ | 1312/3000 [1:49:01<2:21:15, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:38:43.809\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1313/3000 [1:49:06<2:20:36, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:38:48.763\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1314/3000 [1:49:11<2:19:58, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:38:53.698\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1315/3000 [1:49:15<2:18:34, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:38:58.523\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1316/3000 [1:49:20<2:16:06, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:39:03.174\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1317/3000 [1:49:25<2:14:41, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:39:07.866\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1318/3000 [1:49:29<2:13:54, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:39:12.584\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1319/3000 [1:49:34<2:10:26, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:39:16.957\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1320/3000 [1:49:39<2:15:44, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:39:22.254\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1321/3000 [1:49:44<2:17:29, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:39:27.319\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1322/3000 [1:49:49<2:19:16, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:39:32.455\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1323/3000 [1:49:54<2:17:09, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:39:37.193\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1324/3000 [1:49:59<2:16:41, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:39:42.053\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1325/3000 [1:50:04<2:14:37, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:39:46.711\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1326/3000 [1:50:08<2:11:54, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:39:51.217\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1327/3000 [1:50:12<2:09:11, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:39:55.630\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1328/3000 [1:50:17<2:08:57, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:40:00.245\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1329/3000 [1:50:22<2:10:57, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:40:05.122\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1330/3000 [1:50:27<2:13:20, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:40:10.119\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1331/3000 [1:50:31<2:10:58, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:40:14.635\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1332/3000 [1:50:36<2:10:06, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:40:19.249\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1333/3000 [1:50:41<2:11:19, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:40:24.084\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1334/3000 [1:50:46<2:11:11, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:40:28.804\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 44%|████▍ | 1335/3000 [1:50:52<2:27:30, 5.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:40:35.499\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1336/3000 [1:50:57<2:24:46, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:40:40.498\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1337/3000 [1:51:02<2:18:55, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:40:45.024\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1338/3000 [1:51:06<2:15:45, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:40:49.666\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1339/3000 [1:51:11<2:16:35, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:40:54.676\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1340/3000 [1:51:17<2:18:08, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:40:59.806\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1341/3000 [1:51:21<2:14:36, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:41:04.384\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1342/3000 [1:51:26<2:14:37, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:41:09.264\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1343/3000 [1:51:31<2:13:10, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:41:13.971\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1344/3000 [1:51:37<2:24:58, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:41:20.228\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1345/3000 [1:51:42<2:25:21, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:41:25.538\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1346/3000 [1:51:47<2:20:17, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:41:30.206\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1347/3000 [1:51:55<2:40:20, 5.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:41:37.731\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1348/3000 [1:51:59<2:27:23, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:41:41.995\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▍ | 1349/3000 [1:52:04<2:23:33, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:41:46.895\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1350/3000 [1:52:08<2:18:21, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:41:51.491\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1351/3000 [1:52:13<2:11:46, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:41:55.736\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1352/3000 [1:52:17<2:06:51, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:41:59.943\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1353/3000 [1:52:21<2:06:56, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:42:04.582\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1354/3000 [1:52:26<2:07:34, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:42:09.293\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1355/3000 [1:52:31<2:08:50, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:42:14.107\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1356/3000 [1:52:36<2:11:34, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:42:19.147\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1357/3000 [1:52:41<2:12:17, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:42:24.048\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1358/3000 [1:52:45<2:08:10, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:42:28.386\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1359/3000 [1:52:49<2:04:46, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:42:32.665\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1360/3000 [1:52:55<2:13:17, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:42:38.275\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1361/3000 [1:52:59<2:08:29, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:42:42.575\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1362/3000 [1:53:04<2:11:33, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:42:47.663\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1363/3000 [1:53:10<2:16:50, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:42:53.138\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 45%|████▌ | 1364/3000 [1:53:14<2:12:35, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:42:57.643\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1365/3000 [1:53:19<2:13:47, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:43:02.663\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1366/3000 [1:53:24<2:10:41, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:43:07.204\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1367/3000 [1:53:28<2:07:47, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:43:11.657\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1368/3000 [1:53:32<2:02:00, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:43:15.654\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1369/3000 [1:53:37<2:04:59, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:43:20.513\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1370/3000 [1:53:42<2:07:13, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:43:25.396\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1371/3000 [1:53:46<2:03:31, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:43:29.634\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1372/3000 [1:53:51<2:03:24, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:43:34.179\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1373/3000 [1:53:56<2:05:19, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:43:38.971\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1374/3000 [1:54:00<2:05:22, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:43:43.610\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1375/3000 [1:54:05<2:05:52, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:43:48.306\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1376/3000 [1:54:10<2:08:13, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:43:53.253\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1377/3000 [1:54:15<2:06:58, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:43:57.845\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1378/3000 [1:54:20<2:08:45, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:44:02.770\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1379/3000 [1:54:25<2:10:43, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:44:07.784\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1380/3000 [1:54:29<2:10:31, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:44:12.608\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1381/3000 [1:54:34<2:06:55, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:44:17.008\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1382/3000 [1:54:39<2:13:47, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:44:22.570\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1383/3000 [1:54:45<2:16:20, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:44:27.857\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1384/3000 [1:54:49<2:09:55, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:44:32.132\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1385/3000 [1:54:55<2:19:19, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:44:38.130\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1386/3000 [1:54:59<2:09:45, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:44:42.132\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▌ | 1387/3000 [1:55:03<2:06:44, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:44:46.592\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1388/3000 [1:55:08<2:07:42, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:44:51.436\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1389/3000 [1:55:13<2:05:03, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:44:55.870\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1390/3000 [1:55:17<2:01:59, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:45:00.156\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1391/3000 [1:55:22<2:02:09, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:45:04.733\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1392/3000 [1:55:27<2:09:43, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:45:10.239\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1393/3000 [1:55:32<2:11:51, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:45:15.355\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1394/3000 [1:55:38<2:17:42, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:45:21.018\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 46%|████▋ | 1395/3000 [1:55:42<2:13:40, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:45:25.670\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1396/3000 [1:55:47<2:08:07, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:45:29.985\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1397/3000 [1:55:52<2:08:04, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:45:34.783\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1398/3000 [1:55:57<2:08:57, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:45:39.697\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1399/3000 [1:56:02<2:12:18, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:45:44.955\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1400/3000 [1:56:07<2:10:48, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:45:49.737\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1401/3000 [1:56:11<2:04:50, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:45:53.905\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1402/3000 [1:56:16<2:11:47, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:45:59.469\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1403/3000 [1:56:22<2:14:33, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:46:04.774\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1404/3000 [1:56:27<2:16:23, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:46:10.070\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1405/3000 [1:56:33<2:27:29, 5.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:46:16.601\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1406/3000 [1:56:39<2:29:07, 5.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:46:22.365\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1407/3000 [1:56:44<2:19:21, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:46:26.765\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1408/3000 [1:56:48<2:16:30, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:46:31.665\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1409/3000 [1:56:53<2:11:57, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:46:36.250\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1410/3000 [1:56:58<2:13:04, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:46:41.377\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1411/3000 [1:57:03<2:14:27, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:46:46.583\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1412/3000 [1:57:08<2:10:37, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:46:51.188\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1413/3000 [1:57:13<2:12:14, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:46:56.338\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1414/3000 [1:57:18<2:09:30, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:47:01.002\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1415/3000 [1:57:23<2:09:24, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:47:05.900\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1416/3000 [1:57:27<2:04:17, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:47:10.164\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1417/3000 [1:57:32<2:04:41, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:47:14.932\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1418/3000 [1:57:37<2:09:47, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:47:20.312\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1419/3000 [1:57:42<2:09:07, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:47:25.162\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1420/3000 [1:57:47<2:07:27, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:47:29.862\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1421/3000 [1:57:52<2:07:35, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:47:34.729\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1422/3000 [1:57:56<2:00:49, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:47:38.730\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1423/3000 [1:58:00<1:58:38, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:47:43.057\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 47%|████▋ | 1424/3000 [1:58:04<1:57:51, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:47:47.482\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1425/3000 [1:58:09<1:55:35, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:47:51.691\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1426/3000 [1:58:13<1:56:21, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:47:56.200\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1427/3000 [1:58:18<1:57:58, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:48:00.851\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1428/3000 [1:58:22<1:56:25, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:48:05.163\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1429/3000 [1:58:27<2:02:25, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:48:10.380\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1430/3000 [1:58:32<2:02:04, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:48:15.022\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1431/3000 [1:58:36<2:01:15, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:48:19.593\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1432/3000 [1:58:41<2:03:43, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:48:24.554\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1433/3000 [1:58:46<2:05:44, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:48:29.557\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1434/3000 [1:58:51<2:01:41, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:48:33.864\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1435/3000 [1:58:56<2:04:56, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:48:38.952\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1436/3000 [1:59:00<2:01:25, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:48:43.303\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1437/3000 [1:59:05<2:04:53, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:48:48.415\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1438/3000 [1:59:10<2:07:10, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:48:53.512\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1439/3000 [1:59:15<2:03:09, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:48:57.891\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1440/3000 [1:59:20<2:05:30, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:49:02.937\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1441/3000 [1:59:25<2:05:47, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:49:07.811\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1442/3000 [1:59:34<2:43:18, 6.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:49:17.479\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1443/3000 [1:59:39<2:30:21, 5.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:49:22.117\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1444/3000 [1:59:45<2:29:34, 5.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:49:27.824\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1445/3000 [1:59:49<2:19:25, 5.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:49:32.298\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1446/3000 [1:59:55<2:19:23, 5.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:49:37.686\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1447/3000 [1:59:59<2:12:46, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:49:42.228\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1448/3000 [2:00:03<2:06:46, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:49:46.595\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1449/3000 [2:00:08<2:02:43, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:49:50.984\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1450/3000 [2:00:13<2:06:55, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:49:56.285\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1451/3000 [2:00:18<2:03:22, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:50:00.750\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1452/3000 [2:00:22<2:03:50, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:50:05.599\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1453/3000 [2:00:27<2:01:14, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:50:10.072\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1454/3000 [2:00:32<2:04:08, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:50:15.161\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 48%|████▊ | 1455/3000 [2:00:37<2:07:02, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:50:20.365\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▊ | 1456/3000 [2:00:42<2:09:45, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:50:25.660\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▊ | 1457/3000 [2:00:47<2:04:06, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:50:29.982\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▊ | 1458/3000 [2:00:52<2:08:53, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:50:35.438\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▊ | 1459/3000 [2:00:57<2:09:15, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:50:40.511\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▊ | 1460/3000 [2:01:02<2:09:28, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:50:45.584\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▊ | 1461/3000 [2:01:07<2:06:39, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:50:50.273\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▊ | 1462/3000 [2:01:12<2:04:56, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:50:54.998\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1463/3000 [2:01:17<2:08:41, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:51:00.371\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1464/3000 [2:01:22<2:10:03, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:51:05.583\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1465/3000 [2:01:27<2:07:57, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:51:10.402\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1466/3000 [2:01:32<2:07:25, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:51:15.344\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1467/3000 [2:01:37<2:04:36, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:51:19.972\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1468/3000 [2:01:43<2:15:37, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:51:26.299\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1469/3000 [2:01:48<2:15:00, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:51:31.541\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1470/3000 [2:01:53<2:07:15, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:51:35.830\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1471/3000 [2:01:58<2:10:51, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:51:41.303\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1472/3000 [2:02:02<2:04:56, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:51:45.674\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1473/3000 [2:02:08<2:08:34, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:51:51.067\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1474/3000 [2:02:13<2:09:16, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:51:56.221\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1475/3000 [2:02:18<2:06:04, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:52:00.895\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1476/3000 [2:02:24<2:14:34, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:52:06.982\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1477/3000 [2:02:29<2:11:37, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:52:11.904\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1478/3000 [2:02:33<2:07:10, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:52:16.517\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1479/3000 [2:02:38<2:01:48, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:52:20.836\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1480/3000 [2:02:43<2:08:29, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:52:26.530\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1481/3000 [2:02:48<2:06:47, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:52:31.391\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1482/3000 [2:02:53<2:03:36, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:52:35.990\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1483/3000 [2:02:58<2:08:32, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:52:41.538\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 49%|████▉ | 1484/3000 [2:03:03<2:07:29, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:52:46.494\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1485/3000 [2:03:09<2:14:20, 5.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:52:52.455\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1486/3000 [2:03:15<2:15:44, 5.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:52:57.973\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1487/3000 [2:03:20<2:14:49, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:53:03.242\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1488/3000 [2:03:26<2:16:52, 5.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:53:08.872\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1489/3000 [2:03:31<2:14:09, 5.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:53:13.955\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1490/3000 [2:03:36<2:09:36, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:53:18.691\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1491/3000 [2:03:40<2:08:02, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:53:23.646\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1492/3000 [2:03:45<2:03:17, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:53:28.118\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1493/3000 [2:03:49<1:59:42, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:53:32.558\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1494/3000 [2:03:55<2:07:54, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:53:38.424\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1495/3000 [2:04:00<2:02:49, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:53:42.856\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1496/3000 [2:04:04<2:01:58, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:53:47.649\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1497/3000 [2:04:10<2:03:34, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:53:52.741\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1498/3000 [2:04:14<1:57:02, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:53:56.815\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|████▉ | 1499/3000 [2:04:18<1:53:29, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:54:01.028\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1500/3000 [2:04:23<1:54:57, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:54:05.769\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1501/3000 [2:04:27<1:56:03, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:54:10.524\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1502/3000 [2:04:33<2:00:49, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:54:15.817\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1503/3000 [2:04:37<1:59:07, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:54:20.440\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1504/3000 [2:04:43<2:03:34, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:54:25.819\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1505/3000 [2:04:47<1:58:38, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:54:30.126\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1506/3000 [2:04:52<2:02:26, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:54:35.409\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n", "\u001b[32m2026-01-05 11:56:23.458\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mevoagentx.workflow.workflow\u001b[0m:\u001b[36masync_execute\u001b[0m:\u001b[36m104\u001b[0m - \u001b[31m\u001b[1mAn Error occurs when executing the workflow: Error during single_generate_async: litellm.APIError: AzureException APIError - 'str' object has no attribute 'model_dump'\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1507/3000 [2:06:40<14:52:15, 35.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:56:23.461\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1508/3000 [2:06:45<11:01:55, 26.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:56:28.521\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1509/3000 [2:06:51<8:21:38, 20.19s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:56:33.699\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1510/3000 [2:06:56<6:29:45, 15.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:56:38.915\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1511/3000 [2:07:01<5:14:27, 12.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:56:44.531\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1512/3000 [2:07:07<4:20:26, 10.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:56:49.970\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1513/3000 [2:07:11<3:35:05, 8.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:56:54.396\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1514/3000 [2:07:15<3:01:49, 7.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:56:58.615\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 50%|█████ | 1515/3000 [2:07:19<2:35:59, 6.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:57:02.495\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1516/3000 [2:07:24<2:22:03, 5.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:57:06.934\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1517/3000 [2:07:28<2:12:33, 5.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:57:11.410\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1518/3000 [2:07:33<2:04:27, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:57:15.691\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1519/3000 [2:07:37<2:01:12, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:57:20.302\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1520/3000 [2:07:42<2:02:26, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:57:25.390\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1521/3000 [2:07:47<2:03:15, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:57:30.475\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1522/3000 [2:07:52<2:03:23, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:57:35.505\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1523/3000 [2:07:57<2:01:14, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:57:40.236\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1524/3000 [2:08:02<1:57:47, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:57:44.704\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1525/3000 [2:08:06<1:56:53, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:57:49.381\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1526/3000 [2:08:12<2:04:18, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:57:55.153\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1527/3000 [2:08:17<2:01:06, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:57:59.789\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1528/3000 [2:08:21<1:56:15, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:58:04.075\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1529/3000 [2:08:25<1:53:21, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:58:08.430\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1530/3000 [2:08:30<1:51:03, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:58:12.752\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1531/3000 [2:08:34<1:46:40, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:58:16.698\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1532/3000 [2:08:38<1:50:37, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:58:21.603\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1533/3000 [2:08:43<1:54:32, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:58:26.670\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1534/3000 [2:08:48<1:56:51, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:58:31.680\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1535/3000 [2:08:53<1:56:35, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:58:36.437\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1536/3000 [2:08:58<1:56:16, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:58:41.180\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████ | 1537/3000 [2:09:03<2:00:25, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:58:46.523\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 1538/3000 [2:09:08<1:57:55, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:58:51.130\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 1539/3000 [2:09:12<1:52:41, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:58:55.264\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 1540/3000 [2:09:17<1:52:01, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:58:59.811\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 1541/3000 [2:09:21<1:53:22, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:59:04.611\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 1542/3000 [2:09:26<1:55:54, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:59:09.631\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 1543/3000 [2:09:32<1:58:33, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:59:14.776\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 51%|█████▏ | 1544/3000 [2:09:36<1:54:42, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:59:19.141\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1545/3000 [2:09:41<1:57:41, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:59:24.289\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1546/3000 [2:09:45<1:53:44, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:59:28.611\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1547/3000 [2:09:51<2:00:22, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:59:34.228\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1548/3000 [2:09:56<1:58:41, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:59:38.979\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1549/3000 [2:10:01<1:57:47, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:59:43.771\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1550/3000 [2:10:06<2:02:21, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 11:59:49.282\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1551/3000 [2:10:12<2:08:15, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 11:59:55.171\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1552/3000 [2:10:17<2:06:10, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:00:00.206\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1553/3000 [2:10:22<2:02:29, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:00:04.937\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1554/3000 [2:10:27<2:03:48, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:00:10.209\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1555/3000 [2:10:32<2:05:57, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:00:15.657\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1556/3000 [2:10:37<2:02:56, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:00:20.482\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1557/3000 [2:10:42<2:02:01, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:00:25.475\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1558/3000 [2:10:47<1:59:42, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:00:30.238\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1559/3000 [2:10:52<1:56:00, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:00:34.718\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1560/3000 [2:10:57<1:58:23, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:00:39.891\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1561/3000 [2:11:02<2:01:49, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:00:45.311\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1562/3000 [2:11:07<2:03:39, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:00:50.657\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1563/3000 [2:11:12<1:59:13, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:00:55.213\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1564/3000 [2:11:17<1:57:04, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:00:59.903\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1565/3000 [2:11:23<2:06:23, 5.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:01:06.104\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1566/3000 [2:11:29<2:09:53, 5.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:01:11.889\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1567/3000 [2:11:35<2:13:31, 5.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:01:17.845\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1568/3000 [2:11:39<2:03:23, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:01:22.032\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1569/3000 [2:11:44<2:01:31, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:01:26.955\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1570/3000 [2:11:49<2:02:16, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:01:32.167\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1571/3000 [2:11:53<1:56:35, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:01:36.514\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1572/3000 [2:11:59<1:59:33, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:01:41.837\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1573/3000 [2:12:04<2:03:23, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:01:47.410\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▏ | 1574/3000 [2:12:09<1:58:56, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:01:51.984\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 52%|█████▎ | 1575/3000 [2:12:13<1:55:56, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:01:56.581\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1576/3000 [2:12:18<1:53:41, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:02:01.157\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1577/3000 [2:12:23<1:55:45, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:02:06.248\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1578/3000 [2:12:28<1:54:10, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:02:10.918\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1579/3000 [2:12:34<2:04:26, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:02:17.192\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1580/3000 [2:12:39<2:00:55, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:02:21.963\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1581/3000 [2:12:44<2:02:03, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:02:27.244\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1582/3000 [2:12:50<2:04:08, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:02:32.711\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1583/3000 [2:12:54<2:00:59, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:02:37.531\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1584/3000 [2:13:00<2:01:54, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:02:42.797\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1585/3000 [2:13:05<2:03:05, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:02:48.142\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1586/3000 [2:13:10<2:03:44, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:02:53.466\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1587/3000 [2:13:15<1:58:33, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:02:57.995\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1588/3000 [2:13:20<1:58:34, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:03:03.044\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1589/3000 [2:13:25<2:02:17, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:03:08.621\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1590/3000 [2:13:31<2:03:24, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:03:13.993\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1591/3000 [2:13:36<2:02:14, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:03:19.090\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1592/3000 [2:13:41<2:04:39, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:03:24.652\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1593/3000 [2:13:47<2:08:39, 5.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:03:30.545\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1594/3000 [2:13:54<2:16:31, 5.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:03:37.164\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1595/3000 [2:13:59<2:13:25, 5.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:03:42.562\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1596/3000 [2:14:05<2:11:11, 5.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:03:47.955\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1597/3000 [2:14:10<2:05:13, 5.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:03:52.724\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1598/3000 [2:14:15<2:05:35, 5.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:03:58.146\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1599/3000 [2:14:20<2:05:22, 5.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:04:03.501\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1600/3000 [2:14:25<2:00:33, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:04:08.195\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1601/3000 [2:14:31<2:04:35, 5.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:04:13.950\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1602/3000 [2:14:36<2:04:33, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:04:19.301\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1603/3000 [2:14:41<2:00:04, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:04:24.018\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 53%|█████▎ | 1604/3000 [2:14:47<2:03:59, 5.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:04:29.749\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1605/3000 [2:14:52<2:06:12, 5.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:04:35.409\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1606/3000 [2:14:58<2:05:29, 5.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:04:40.748\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1607/3000 [2:15:03<2:06:28, 5.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:04:46.303\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1608/3000 [2:15:09<2:12:13, 5.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:04:52.589\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1609/3000 [2:15:15<2:09:15, 5.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:04:57.876\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1610/3000 [2:15:20<2:05:57, 5.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:05:02.990\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1611/3000 [2:15:25<2:04:39, 5.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:05:08.254\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▎ | 1612/3000 [2:15:30<2:03:23, 5.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:05:13.469\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1613/3000 [2:15:36<2:03:15, 5.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:05:18.795\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1614/3000 [2:15:41<2:03:18, 5.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:05:24.148\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1615/3000 [2:15:47<2:04:53, 5.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:05:29.728\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1616/3000 [2:15:52<2:02:28, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:05:34.802\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1617/3000 [2:15:58<2:07:29, 5.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:05:40.849\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1618/3000 [2:16:03<2:08:42, 5.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:05:46.570\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1619/3000 [2:16:09<2:05:20, 5.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:05:51.685\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1620/3000 [2:16:14<2:07:43, 5.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:05:57.488\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1621/3000 [2:16:19<2:01:52, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:06:02.207\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1622/3000 [2:16:24<1:59:30, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:06:07.180\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1623/3000 [2:16:30<2:06:56, 5.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:06:13.476\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1624/3000 [2:16:35<2:02:21, 5.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:06:18.354\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1625/3000 [2:16:40<1:58:05, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:06:23.081\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1626/3000 [2:16:45<1:59:52, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:06:28.507\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1627/3000 [2:16:52<2:07:30, 5.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:06:34.865\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1628/3000 [2:16:57<2:03:40, 5.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:06:39.893\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1629/3000 [2:17:02<2:05:33, 5.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:06:45.588\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1630/3000 [2:17:08<2:05:00, 5.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:06:51.016\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1631/3000 [2:17:13<2:04:26, 5.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:06:56.421\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1632/3000 [2:17:18<1:58:52, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:07:01.076\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1633/3000 [2:17:23<1:58:29, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:07:06.245\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 54%|█████▍ | 1634/3000 [2:17:28<1:58:17, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:07:11.430\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1635/3000 [2:17:33<1:54:10, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:07:16.036\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1636/3000 [2:17:39<2:02:32, 5.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:07:22.294\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1637/3000 [2:17:44<1:59:56, 5.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:07:27.315\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1638/3000 [2:17:49<1:59:19, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:07:32.519\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1639/3000 [2:17:54<1:57:52, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:07:37.573\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1640/3000 [2:18:00<1:58:05, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:07:42.815\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1641/3000 [2:18:06<2:04:59, 5.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:07:49.055\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1642/3000 [2:18:11<1:59:05, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:07:53.717\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1643/3000 [2:18:15<1:56:09, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:07:58.560\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1644/3000 [2:18:20<1:55:21, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:08:03.591\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1645/3000 [2:18:26<1:56:23, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:08:08.859\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1646/3000 [2:18:32<2:02:32, 5.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:08:14.934\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1647/3000 [2:18:37<1:59:15, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:08:19.893\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1648/3000 [2:18:42<2:02:29, 5.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:08:25.673\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▍ | 1649/3000 [2:18:49<2:08:10, 5.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:08:31.965\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1650/3000 [2:18:53<2:00:30, 5.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:08:36.534\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1651/3000 [2:18:58<1:54:16, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:08:40.980\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1652/3000 [2:19:03<1:54:18, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:08:46.081\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1653/3000 [2:19:08<1:51:13, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:08:50.722\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1654/3000 [2:19:13<1:51:56, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:08:55.796\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1655/3000 [2:19:18<1:55:04, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:09:01.264\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1656/3000 [2:19:23<1:53:47, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:09:06.219\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1657/3000 [2:19:28<1:52:55, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:09:11.183\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1658/3000 [2:19:33<1:54:34, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:09:16.487\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1659/3000 [2:19:39<1:57:03, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:09:21.992\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1660/3000 [2:19:44<1:57:08, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:09:27.255\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1661/3000 [2:19:49<1:57:01, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:09:32.496\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1662/3000 [2:19:54<1:54:11, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:09:37.329\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1663/3000 [2:19:59<1:54:45, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:09:42.548\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 55%|█████▌ | 1664/3000 [2:20:05<1:59:02, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:09:48.352\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1665/3000 [2:20:11<2:00:54, 5.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:09:53.991\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1666/3000 [2:20:16<2:02:05, 5.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:09:59.616\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1667/3000 [2:20:22<2:05:07, 5.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:10:05.576\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1668/3000 [2:20:30<2:17:50, 6.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:10:13.132\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1669/3000 [2:20:36<2:15:37, 6.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:10:19.023\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1670/3000 [2:20:43<2:19:24, 6.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:10:25.721\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1671/3000 [2:20:47<2:08:50, 5.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:10:30.436\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1672/3000 [2:20:52<2:02:51, 5.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:10:35.366\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1673/3000 [2:20:57<2:01:04, 5.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:10:40.662\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1674/3000 [2:21:03<2:01:18, 5.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:10:46.187\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1675/3000 [2:21:08<2:00:02, 5.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:10:51.498\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1676/3000 [2:21:14<1:58:47, 5.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:10:56.759\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1677/3000 [2:21:19<2:00:18, 5.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:11:02.385\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1678/3000 [2:21:25<2:00:47, 5.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:11:07.928\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1679/3000 [2:21:30<1:58:20, 5.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:11:13.053\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1680/3000 [2:21:35<1:54:28, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:11:17.855\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1681/3000 [2:21:42<2:09:31, 5.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:11:25.354\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1682/3000 [2:21:48<2:11:38, 5.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:11:31.583\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1683/3000 [2:21:54<2:09:34, 5.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:11:37.277\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1684/3000 [2:21:59<2:05:05, 5.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:11:42.513\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1685/3000 [2:22:05<2:07:01, 5.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:11:48.525\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1686/3000 [2:22:11<2:05:54, 5.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:11:54.165\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▌ | 1687/3000 [2:22:16<1:58:03, 5.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:11:58.734\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1688/3000 [2:22:21<1:59:39, 5.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:12:04.387\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1689/3000 [2:22:27<2:00:30, 5.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:12:10.002\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1690/3000 [2:22:32<1:56:30, 5.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:12:14.922\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1691/3000 [2:22:37<1:55:33, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:12:20.126\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1692/3000 [2:22:42<1:53:22, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:12:25.101\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1693/3000 [2:22:47<1:53:40, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:12:30.362\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1694/3000 [2:22:52<1:51:25, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:12:35.248\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 56%|█████▋ | 1695/3000 [2:22:57<1:50:49, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:12:40.290\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1696/3000 [2:23:02<1:49:06, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:12:45.134\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1697/3000 [2:23:08<1:53:20, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:12:50.818\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1698/3000 [2:23:13<1:53:11, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:12:56.027\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1699/3000 [2:23:18<1:53:35, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:13:01.317\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1700/3000 [2:23:24<1:56:32, 5.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:13:07.023\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1701/3000 [2:23:29<1:55:54, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:13:12.319\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1702/3000 [2:23:34<1:51:28, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:13:17.003\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1703/3000 [2:23:39<1:50:32, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:13:22.025\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1704/3000 [2:23:44<1:48:17, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:13:26.805\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1705/3000 [2:23:49<1:47:58, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:13:31.782\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1706/3000 [2:23:54<1:48:53, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:13:36.940\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1707/3000 [2:23:59<1:50:15, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:13:42.213\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1708/3000 [2:24:05<1:57:56, 5.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:13:48.532\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1709/3000 [2:24:10<1:54:37, 5.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:13:53.508\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1710/3000 [2:24:16<1:54:17, 5.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:13:58.798\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1711/3000 [2:24:20<1:48:15, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:14:03.191\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1712/3000 [2:24:25<1:50:26, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:14:08.583\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1713/3000 [2:24:30<1:46:15, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:14:13.090\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1714/3000 [2:24:35<1:45:55, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:14:18.004\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1715/3000 [2:24:40<1:46:20, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:14:23.026\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1716/3000 [2:24:45<1:46:08, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:14:27.972\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1717/3000 [2:24:50<1:47:43, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:14:33.192\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1718/3000 [2:24:55<1:44:12, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:14:37.694\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1719/3000 [2:24:59<1:43:48, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:14:42.522\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1720/3000 [2:25:04<1:44:23, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:14:47.487\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1721/3000 [2:25:10<1:49:41, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:14:53.223\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1722/3000 [2:25:15<1:45:49, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:14:57.776\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1723/3000 [2:25:20<1:46:08, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:15:02.808\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▋ | 1724/3000 [2:25:25<1:49:53, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:15:08.396\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 57%|█████▊ | 1725/3000 [2:25:30<1:49:36, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:15:13.533\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1726/3000 [2:25:35<1:48:43, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:15:18.566\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1727/3000 [2:25:41<1:51:34, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:15:24.146\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1728/3000 [2:25:46<1:51:19, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:15:29.379\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1729/3000 [2:25:51<1:48:36, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:15:34.216\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1730/3000 [2:25:56<1:45:15, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:15:38.831\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1731/3000 [2:26:01<1:48:33, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:15:44.336\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1732/3000 [2:26:06<1:48:14, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:15:49.434\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1733/3000 [2:26:12<1:49:14, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:15:54.725\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1734/3000 [2:26:17<1:50:34, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:16:00.123\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1735/3000 [2:26:22<1:51:27, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:16:05.518\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1736/3000 [2:26:29<1:58:07, 5.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:16:11.873\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1737/3000 [2:26:34<1:55:41, 5.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:16:17.109\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1738/3000 [2:26:39<1:51:11, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:16:21.906\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1739/3000 [2:26:43<1:47:40, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:16:26.648\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1740/3000 [2:26:49<1:48:44, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:16:31.955\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1741/3000 [2:26:54<1:50:38, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:16:37.448\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1742/3000 [2:26:59<1:48:30, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:16:42.395\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1743/3000 [2:27:05<1:53:22, 5.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:16:48.361\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1744/3000 [2:27:10<1:47:39, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:16:52.876\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1745/3000 [2:27:15<1:47:19, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:16:57.979\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1746/3000 [2:27:19<1:44:23, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:17:02.657\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1747/3000 [2:27:24<1:43:52, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:17:07.581\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1748/3000 [2:27:30<1:47:15, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:17:13.110\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1749/3000 [2:27:35<1:48:34, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:17:18.473\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1750/3000 [2:27:41<1:51:27, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:17:24.157\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1751/3000 [2:27:46<1:49:43, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:17:29.243\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1752/3000 [2:27:51<1:44:34, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:17:33.703\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1753/3000 [2:27:55<1:43:02, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:17:38.498\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1754/3000 [2:28:00<1:43:01, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:17:43.466\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 58%|█████▊ | 1755/3000 [2:28:05<1:40:02, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:17:47.960\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▊ | 1756/3000 [2:28:10<1:41:27, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:17:53.023\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▊ | 1757/3000 [2:28:15<1:40:57, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:17:57.849\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▊ | 1758/3000 [2:28:20<1:40:56, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:18:02.734\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▊ | 1759/3000 [2:28:25<1:41:54, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:18:07.778\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▊ | 1760/3000 [2:28:30<1:43:40, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:18:13.005\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▊ | 1761/3000 [2:28:35<1:42:29, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:18:17.843\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▊ | 1762/3000 [2:28:40<1:41:52, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:18:22.721\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1763/3000 [2:28:44<1:39:05, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:18:27.221\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1764/3000 [2:28:49<1:40:43, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:18:32.306\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1765/3000 [2:28:54<1:39:32, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:18:37.015\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1766/3000 [2:28:59<1:39:01, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:18:41.783\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1767/3000 [2:29:04<1:39:35, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:18:46.701\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1768/3000 [2:29:09<1:41:04, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:18:51.802\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1769/3000 [2:29:14<1:43:51, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:18:57.189\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1770/3000 [2:29:19<1:41:29, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:19:01.880\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1771/3000 [2:29:23<1:40:08, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:19:06.625\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1772/3000 [2:29:29<1:41:38, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:19:11.772\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1773/3000 [2:29:35<1:47:52, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:19:17.766\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1774/3000 [2:29:39<1:45:15, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:19:22.630\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1775/3000 [2:29:45<1:46:26, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:19:27.987\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1776/3000 [2:29:49<1:42:56, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:19:32.644\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1777/3000 [2:29:55<1:43:19, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:19:37.767\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1778/3000 [2:29:59<1:40:16, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:19:42.351\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1779/3000 [2:30:05<1:44:44, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:19:48.019\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1780/3000 [2:30:09<1:41:10, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:19:52.595\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1781/3000 [2:30:14<1:37:25, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:19:56.970\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1782/3000 [2:30:19<1:38:22, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:20:01.935\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1783/3000 [2:30:24<1:40:38, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:20:07.166\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 59%|█████▉ | 1784/3000 [2:30:29<1:39:55, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:20:12.025\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1785/3000 [2:30:34<1:42:16, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:20:17.355\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1786/3000 [2:30:39<1:42:00, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:20:22.376\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1787/3000 [2:30:44<1:41:30, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:20:27.347\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1788/3000 [2:30:49<1:41:36, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:20:32.399\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1789/3000 [2:30:55<1:43:19, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:20:37.728\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1790/3000 [2:31:00<1:45:59, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:20:43.301\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1791/3000 [2:31:06<1:49:25, 5.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:20:49.140\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1792/3000 [2:31:11<1:46:42, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:20:54.136\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1793/3000 [2:31:16<1:43:31, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:20:58.924\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1794/3000 [2:31:21<1:42:58, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:21:03.993\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1795/3000 [2:31:28<1:54:18, 5.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:21:11.012\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1796/3000 [2:31:35<2:01:46, 6.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:21:17.959\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1797/3000 [2:31:40<1:58:12, 5.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:21:23.453\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1798/3000 [2:31:45<1:51:28, 5.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:21:28.243\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|█████▉ | 1799/3000 [2:31:50<1:45:55, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:21:32.899\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1800/3000 [2:31:55<1:46:57, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:21:38.379\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1801/3000 [2:32:01<1:49:24, 5.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:21:44.149\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1802/3000 [2:32:07<1:54:00, 5.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:21:50.408\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1803/3000 [2:32:12<1:47:55, 5.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:21:55.118\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1804/3000 [2:32:18<1:49:14, 5.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:22:00.762\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1805/3000 [2:32:23<1:46:58, 5.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:22:05.879\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1806/3000 [2:32:28<1:44:26, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:22:10.840\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1807/3000 [2:32:33<1:45:24, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:22:16.264\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1808/3000 [2:32:39<1:46:23, 5.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:22:21.747\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1809/3000 [2:32:44<1:45:35, 5.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:22:26.983\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1810/3000 [2:32:49<1:42:25, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:22:31.785\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1811/3000 [2:32:54<1:43:36, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:22:37.164\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1812/3000 [2:32:59<1:43:38, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:22:42.412\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1813/3000 [2:33:05<1:44:43, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:22:47.843\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1814/3000 [2:33:09<1:40:10, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:22:52.385\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 60%|██████ | 1815/3000 [2:33:14<1:38:11, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:22:57.131\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1816/3000 [2:33:20<1:45:37, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:23:03.374\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1817/3000 [2:33:26<1:47:46, 5.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:23:09.104\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1818/3000 [2:33:31<1:46:02, 5.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:23:14.294\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1819/3000 [2:33:37<1:46:10, 5.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:23:19.713\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1820/3000 [2:33:41<1:40:57, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:23:24.238\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1821/3000 [2:33:46<1:40:19, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:23:29.281\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1822/3000 [2:33:51<1:39:31, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:23:34.264\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1823/3000 [2:33:57<1:41:55, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:23:39.756\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1824/3000 [2:34:01<1:38:34, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:23:44.397\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1825/3000 [2:34:06<1:38:17, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:23:49.393\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1826/3000 [2:34:11<1:37:12, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:23:54.242\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1827/3000 [2:34:17<1:42:11, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:24:00.072\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1828/3000 [2:34:22<1:38:41, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:24:04.717\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1829/3000 [2:34:26<1:36:42, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:24:09.445\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1830/3000 [2:34:31<1:33:17, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:24:13.832\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1831/3000 [2:34:37<1:40:07, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:24:19.799\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1832/3000 [2:34:42<1:39:27, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:24:24.839\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1833/3000 [2:34:46<1:36:23, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:24:29.436\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1834/3000 [2:34:51<1:36:21, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:24:34.401\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1835/3000 [2:34:56<1:36:31, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:24:39.402\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1836/3000 [2:35:02<1:39:34, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:24:44.912\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████ | 1837/3000 [2:35:07<1:40:29, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:24:50.215\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 1838/3000 [2:35:12<1:39:45, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:24:55.288\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 1839/3000 [2:35:17<1:39:37, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:25:00.433\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 1840/3000 [2:35:23<1:41:36, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:25:05.936\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 1841/3000 [2:35:28<1:39:51, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:25:10.907\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 1842/3000 [2:35:33<1:39:15, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:25:15.987\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 1843/3000 [2:35:38<1:38:34, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:25:21.028\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 61%|██████▏ | 1844/3000 [2:35:44<1:43:51, 5.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:25:27.068\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1845/3000 [2:35:49<1:43:37, 5.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:25:32.434\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1846/3000 [2:35:54<1:40:18, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:25:37.258\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1847/3000 [2:35:59<1:39:44, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:25:42.389\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1848/3000 [2:36:04<1:35:49, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:25:46.916\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1849/3000 [2:36:09<1:37:34, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:25:52.224\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1850/3000 [2:36:15<1:40:53, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:25:57.904\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1851/3000 [2:36:20<1:42:36, 5.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:26:03.479\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1852/3000 [2:36:25<1:39:29, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:26:08.311\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1853/3000 [2:36:30<1:35:06, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:26:12.760\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1854/3000 [2:36:35<1:37:49, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:26:18.224\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1855/3000 [2:36:40<1:34:20, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:26:22.752\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1856/3000 [2:36:44<1:33:20, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:26:27.536\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1857/3000 [2:36:49<1:30:24, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:26:31.932\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1858/3000 [2:36:54<1:32:38, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:26:37.083\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1859/3000 [2:36:59<1:31:08, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:26:41.703\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1860/3000 [2:37:04<1:37:38, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:26:47.650\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1861/3000 [2:37:09<1:35:59, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:26:52.514\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1862/3000 [2:37:14<1:34:07, 4.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:26:57.258\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1863/3000 [2:37:19<1:32:00, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:27:01.862\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1864/3000 [2:37:24<1:32:17, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:27:06.781\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1865/3000 [2:37:28<1:29:31, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:27:11.182\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1866/3000 [2:37:33<1:29:31, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:27:15.928\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1867/3000 [2:37:38<1:30:02, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:27:20.770\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1868/3000 [2:37:42<1:29:20, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:27:25.428\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1869/3000 [2:37:46<1:26:28, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:27:29.670\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1870/3000 [2:37:52<1:29:00, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:27:34.720\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1871/3000 [2:37:56<1:27:35, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:27:39.209\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1872/3000 [2:38:01<1:28:39, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:27:44.067\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1873/3000 [2:38:06<1:30:13, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:27:49.075\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▏ | 1874/3000 [2:38:11<1:30:09, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:27:53.880\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 62%|██████▎ | 1875/3000 [2:38:15<1:29:36, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:27:58.601\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1876/3000 [2:38:22<1:38:08, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:28:04.912\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1877/3000 [2:38:27<1:36:23, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:28:09.857\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1878/3000 [2:38:31<1:34:00, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:28:14.595\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1879/3000 [2:38:37<1:38:06, 5.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:28:20.370\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1880/3000 [2:38:42<1:36:33, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:28:25.360\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1881/3000 [2:38:48<1:38:38, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:28:30.920\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1882/3000 [2:38:53<1:39:03, 5.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:28:36.300\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1883/3000 [2:38:58<1:35:20, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:28:40.967\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1884/3000 [2:39:03<1:34:33, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:28:45.963\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1885/3000 [2:39:08<1:32:46, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:28:50.743\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1886/3000 [2:39:12<1:27:56, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:28:54.883\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1887/3000 [2:39:16<1:25:27, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:28:59.188\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1888/3000 [2:39:20<1:23:16, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:29:03.416\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1889/3000 [2:39:25<1:24:44, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:29:08.186\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1890/3000 [2:39:30<1:24:29, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:29:12.732\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1891/3000 [2:39:35<1:29:05, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:29:18.141\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1892/3000 [2:39:41<1:33:32, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:29:23.778\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1893/3000 [2:39:46<1:36:36, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:29:29.415\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1894/3000 [2:39:51<1:34:05, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:29:34.210\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1895/3000 [2:39:56<1:32:48, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:29:39.100\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1896/3000 [2:40:01<1:30:44, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:29:43.777\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1897/3000 [2:40:06<1:31:02, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:29:48.779\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1898/3000 [2:40:11<1:32:10, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:29:53.953\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1899/3000 [2:40:15<1:27:39, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:29:58.166\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1900/3000 [2:40:22<1:39:07, 5.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:30:05.043\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1901/3000 [2:40:27<1:38:10, 5.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:30:10.294\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1902/3000 [2:40:32<1:35:21, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:30:15.157\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1903/3000 [2:40:37<1:35:00, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:30:20.319\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 63%|██████▎ | 1904/3000 [2:40:42<1:30:27, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:30:24.703\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1905/3000 [2:40:46<1:29:13, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:30:29.443\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1906/3000 [2:40:52<1:34:38, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:30:35.336\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1907/3000 [2:40:57<1:31:03, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:30:39.889\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1908/3000 [2:41:01<1:29:38, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:30:44.642\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1909/3000 [2:41:08<1:36:17, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:30:50.801\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1910/3000 [2:41:14<1:41:28, 5.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:30:57.064\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1911/3000 [2:41:19<1:38:40, 5.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:31:02.153\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▎ | 1912/3000 [2:41:25<1:39:07, 5.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:31:07.688\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1913/3000 [2:41:30<1:37:42, 5.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:31:12.910\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1914/3000 [2:41:35<1:36:28, 5.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:31:18.093\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1915/3000 [2:41:40<1:35:19, 5.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:31:23.228\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1916/3000 [2:41:46<1:37:35, 5.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:31:28.935\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1917/3000 [2:41:51<1:38:58, 5.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:31:34.607\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1918/3000 [2:41:56<1:36:31, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:31:39.656\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1919/3000 [2:42:03<1:40:07, 5.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:31:45.692\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1920/3000 [2:42:07<1:33:53, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:31:50.111\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1921/3000 [2:42:12<1:31:32, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:31:54.909\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1922/3000 [2:42:16<1:28:11, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:31:59.393\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1923/3000 [2:42:22<1:31:19, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:32:04.899\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1924/3000 [2:42:27<1:29:51, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:32:09.730\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1925/3000 [2:42:32<1:33:10, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:32:15.373\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1926/3000 [2:42:37<1:31:14, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:32:20.229\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1927/3000 [2:42:46<1:53:29, 6.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:32:29.489\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1928/3000 [2:42:52<1:48:47, 6.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:32:34.977\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1929/3000 [2:42:58<1:47:00, 5.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:32:40.754\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1930/3000 [2:43:03<1:42:55, 5.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:32:46.005\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1931/3000 [2:43:08<1:38:45, 5.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:32:51.014\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1932/3000 [2:43:13<1:34:05, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:32:55.701\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1933/3000 [2:43:18<1:34:07, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:33:01.009\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1934/3000 [2:43:24<1:36:14, 5.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:33:06.717\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 64%|██████▍ | 1935/3000 [2:43:28<1:31:54, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:33:11.336\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1936/3000 [2:43:33<1:31:46, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:33:16.507\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1937/3000 [2:43:38<1:31:05, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:33:21.568\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1938/3000 [2:43:43<1:29:48, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:33:26.486\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1939/3000 [2:43:49<1:30:54, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:33:31.781\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1940/3000 [2:43:54<1:29:46, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:33:36.725\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1941/3000 [2:43:58<1:26:03, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:33:41.119\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1942/3000 [2:44:03<1:25:01, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:33:45.817\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1943/3000 [2:44:07<1:23:30, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:33:50.365\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1944/3000 [2:44:12<1:25:23, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:33:55.477\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1945/3000 [2:44:17<1:24:40, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:34:00.210\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1946/3000 [2:44:22<1:26:15, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:34:05.340\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1947/3000 [2:44:27<1:25:27, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:34:10.113\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1948/3000 [2:44:33<1:32:37, 5.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:34:16.361\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▍ | 1949/3000 [2:44:38<1:29:46, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:34:21.117\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1950/3000 [2:44:42<1:25:12, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:34:25.390\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1951/3000 [2:44:48<1:28:01, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:34:30.811\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1952/3000 [2:44:53<1:28:53, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:34:36.026\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1953/3000 [2:44:58<1:29:21, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:34:41.220\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1954/3000 [2:45:03<1:28:49, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:34:46.256\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1955/3000 [2:45:08<1:28:12, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:34:51.249\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1956/3000 [2:45:13<1:25:06, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:34:55.736\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1957/3000 [2:45:18<1:25:57, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:35:00.807\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1958/3000 [2:45:22<1:25:19, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:35:05.645\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1959/3000 [2:45:27<1:23:48, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:35:10.283\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1960/3000 [2:45:33<1:27:14, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:35:15.789\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1961/3000 [2:45:38<1:29:22, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:35:21.248\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1962/3000 [2:45:45<1:37:14, 5.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:35:27.943\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1963/3000 [2:45:50<1:33:07, 5.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:35:32.786\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 65%|██████▌ | 1964/3000 [2:45:54<1:29:47, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:35:37.548\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1965/3000 [2:46:01<1:38:20, 5.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:35:44.419\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1966/3000 [2:46:06<1:34:37, 5.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:35:49.419\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1967/3000 [2:46:12<1:35:59, 5.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:35:55.191\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1968/3000 [2:46:18<1:36:08, 5.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:36:00.814\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1969/3000 [2:46:23<1:35:42, 5.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:36:06.338\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1970/3000 [2:46:29<1:35:25, 5.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:36:11.871\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1971/3000 [2:46:34<1:33:11, 5.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:36:17.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1972/3000 [2:46:39<1:32:31, 5.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:36:22.334\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1973/3000 [2:46:45<1:34:45, 5.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:36:28.189\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1974/3000 [2:46:51<1:38:41, 5.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:36:34.507\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1975/3000 [2:46:56<1:31:53, 5.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:36:38.973\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1976/3000 [2:47:01<1:33:13, 5.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:36:44.629\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1977/3000 [2:47:06<1:29:16, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:36:49.337\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1978/3000 [2:47:12<1:31:31, 5.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:36:55.032\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1979/3000 [2:47:17<1:30:23, 5.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:37:00.199\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1980/3000 [2:47:22<1:28:15, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:37:05.111\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1981/3000 [2:47:26<1:24:45, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:37:09.632\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1982/3000 [2:47:31<1:23:47, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:37:14.448\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1983/3000 [2:47:37<1:26:19, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:37:19.903\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1984/3000 [2:47:42<1:26:24, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:37:25.027\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1985/3000 [2:47:47<1:25:11, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:37:29.909\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1986/3000 [2:47:52<1:27:02, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:37:35.325\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▌ | 1987/3000 [2:47:57<1:24:05, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:37:39.912\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1988/3000 [2:48:01<1:22:17, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:37:44.553\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1989/3000 [2:48:06<1:23:09, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:37:49.618\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1990/3000 [2:48:12<1:27:01, 5.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:37:55.335\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1991/3000 [2:48:17<1:26:13, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:38:00.364\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1992/3000 [2:48:22<1:26:22, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:38:05.537\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1993/3000 [2:48:28<1:29:00, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:38:11.219\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1994/3000 [2:48:33<1:26:05, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:38:15.962\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 66%|██████▋ | 1995/3000 [2:48:38<1:25:53, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:38:21.071\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1996/3000 [2:48:43<1:26:21, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:38:26.311\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1997/3000 [2:48:48<1:27:18, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:38:31.677\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1998/3000 [2:48:54<1:29:23, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:38:37.333\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 1999/3000 [2:49:00<1:31:09, 5.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:38:43.058\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2000/3000 [2:49:04<1:26:41, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:38:47.648\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2001/3000 [2:49:10<1:26:31, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:38:52.833\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643045.883172353)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643057.542556222)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643067.124149535)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643041.34782037)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643052.095718237)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643062.30022808)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643078.241281606)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643088.966469814)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643099.232135138)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643110.161596826)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643083.959197936)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643093.820703718)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643105.100458161)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643122.147248596)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643132.745811166)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643143.212922453)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643153.37335635)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643127.775744837)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643137.665348593)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643148.254500889)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643164.537963349)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643174.839432686)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643185.420399117)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643195.700691084)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643179.647234412)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643190.433006869)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643169.276404486)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643206.022051134)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643216.362452283)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643226.010441126)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643236.691807132)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643211.678905956)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643221.21006819)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643231.476823793)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643246.471267752)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643256.272501394)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643266.335925522)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643276.618643102)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643251.506966111)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643261.719140402)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643271.169169262)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643287.61212916)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643297.727077313)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643307.367370872)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643408.647195323)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643292.072320852)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643302.076124596)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643402.495746075)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643419.242581556)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643428.616880102)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643440.532885356)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643450.751317363)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643434.005315141)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643445.717545884)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643423.956847528)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643460.433664512)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643470.575410775)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643481.457061632)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643494.359596434)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643465.29414606)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643475.630947888)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643486.327449454)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643504.143075485)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643513.91211076)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643523.871223642)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643534.382975509)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643509.060935235)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643518.485386941)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643529.407347295)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643543.872031201)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643553.413648905)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643563.169921722)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643573.156058632)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643548.545355134)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643558.363426763)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643568.332295989)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643579.462443632)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643583.5030923)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643593.072276679)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643604.105841312)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643613.952005878)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643598.485471673)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643609.14691574)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643588.468521316)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643623.353521057)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643633.038579262)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643644.93387974)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643653.876094353)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643628.199380768)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643638.954209008)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643649.461548393)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643663.990659629)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643674.317022118)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643684.330760825)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643694.448182533)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643669.755846433)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643679.787492478)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643688.981498333)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643703.741287024)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643712.910318452)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643723.079675317)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643732.858764975)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643708.422278953)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643718.052289511)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643728.16675715)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643737.695862174)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643742.826801967)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643751.425155355)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643761.462036487)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643771.495546782)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643747.356991336)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643766.004348057)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643755.769691232)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643781.806619672)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643791.385041588)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643802.667090806)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643813.358693743)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643786.847367295)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643796.675342545)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643808.017642458)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643823.393264423)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643833.620915323)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643842.608305007)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643853.23860236)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643828.680413488)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643837.75667238)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643847.780223112)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643863.907563399)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643874.714548285)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643884.567455612)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643895.176374037)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643879.927838989)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643869.143263241)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643890.434700401)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643905.598427128)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643915.871276179)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643926.358633715)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643936.505503291)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643911.254057363)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643921.101196244)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1643931.110853656)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643947.118185511)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643957.075852263)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643966.602992103)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643976.189109978)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643952.5088101)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643961.831542753)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643971.560178579)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643986.95007947)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643997.597195734)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644007.528483539)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644017.167294092)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643992.296410195)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644002.622009178)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644011.745659122)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644028.184891447)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644039.058881551)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644050.325785262)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644059.912057124)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644033.565626683)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644045.520894872)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644055.230760646)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644070.352149068)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644079.708919162)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644090.295093848)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644100.006547299)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644074.702910972)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644085.434213582)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1644095.009914176)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644109.319739837)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644119.105643005)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644129.311644699)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644139.277796366)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644114.373084567)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644134.091368803)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644123.754364303)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643073.259738098)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643115.755390412)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643157.985765553)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643201.543571708)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643242.047978962)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643282.251330527)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643455.753190165)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643498.955396421)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643538.865928145)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643618.957673312)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643659.80909583)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643698.551017736)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643776.848925305)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643818.106561498)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643857.914464891)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643899.748190508)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643941.6078156)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1643981.459584228)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644023.142665016)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644065.141264073)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1644104.701126329)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644143.407502426)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1643413.926181205)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644149.987558438)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644160.556713394)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644170.39455872)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644174.964987279)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644179.968751445)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644185.23814146)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644190.908867845)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644165.350638013)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644155.269114778)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644195.484929861)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644201.34597816)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644206.27229283)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644211.623972857)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644222.872616412)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644232.19774424)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644236.484726994)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644217.610766931)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644227.301971269)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644241.909801353)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644246.357915473)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644255.941425565)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644260.790397133)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644266.166621311)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644271.727081845)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644276.638064125)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644250.992549691)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644281.143599013)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644286.761154897)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644293.640228041)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644298.486283374)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644308.789384741)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644313.932631051)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644318.363384622)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644303.313028306)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644323.012111945)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644327.879523391)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644332.771763366)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644338.008635244)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644348.643615064)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644359.059560804)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644364.190786278)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644343.779858964)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644354.343939257)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644368.500720626)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644378.85291526)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644389.224967343)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644399.515188731)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644403.611388211)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644374.293290195)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644384.521302174)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644393.907938029)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644408.390955421)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644417.544497842)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644427.349215417)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644438.40154119)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644443.079048925)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644412.712582437)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644422.070377029)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644431.863190083)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644447.655676405)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644457.82087764)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644468.314224015)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644478.700013895)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644483.335113813)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644452.970534358)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644463.442402359)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644473.009585237)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644488.289366955)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644498.049135628)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644507.392266273)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644516.483734833)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644521.78012686)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644493.223969647)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644502.700945899)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644512.110683144)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644526.845838134)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644536.719509877)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644546.237097585)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644555.156728723)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644559.771775498)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644531.9810754)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644541.579709947)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1644550.74373576)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644564.648302595)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644574.161720098)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644583.610479742)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644595.025384094)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644600.023832833)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644569.645425455)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644578.775496725)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644588.330407638)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644604.550392471)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644614.202627268)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644623.910356879)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644633.497013181)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644639.754851531)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644609.19254985)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644619.332845587)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644628.790841727)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644645.06472734)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644657.257094784)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644666.421640795)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644675.262321978)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644679.46959488)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644649.732655481)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644661.521660065)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644671.017946555)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644684.108254089)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644693.63315783)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644703.573974277)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1644712.191050797)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644717.801054458)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644688.819048182)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644698.673809394)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644707.912845197)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644722.101671449)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644732.664152933)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644742.189787394)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644751.183415133)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644755.180237145)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644727.189508336)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644737.169913555)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644746.730877715)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644760.039841659)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644769.160056687)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644778.497674387)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644787.832469412)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644792.779640536)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644773.705848012)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644783.13615478)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644764.922423597)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644797.371660524)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644807.310510795)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644816.534209213)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644827.383134117)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644831.658171953)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644802.296089514)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644812.13419446)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1644822.09612191)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644837.656673617)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644846.118167748)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644855.396786202)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644864.259919292)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644869.765575087)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644841.658548373)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644850.962060251)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644859.682981196)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644874.881163646)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644885.196210518)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644894.309183491)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644904.481604688)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644909.263526825)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644880.54403793)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644889.511506902)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644899.22299593)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644913.431893968)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644924.300164139)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644936.127577862)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644946.291252376)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644951.191788812)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644918.995805913)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644929.596645169)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644941.890986169)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644955.776767674)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644966.109298878)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644975.864580715)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1644985.426521527)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644989.690240284)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644960.902831277)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644970.714954774)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1644980.528906753)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644994.458405114)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645004.688528773)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645014.255397145)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645022.583890652)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645027.008477982)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1644999.838335788)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645009.388321088)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645018.25650254)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645031.217392699)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645040.377838137)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645049.906112971)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645059.119120648)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645064.08093533)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645035.726352302)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645044.689572329)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645054.548609212)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645069.083363989)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645078.478031187)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645087.941783806)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645097.417913024)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645102.463181301)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645073.390645012)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645093.038097596)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1645082.829296995)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645107.337493388)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645121.643973218)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645131.824736281)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645141.754117073)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645146.121253913)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645117.005659285)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645127.350710176)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645137.212676408)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645150.510589811)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645160.276378407)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645169.598941667)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645179.891557655)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645185.18634689)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645155.811163754)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645165.12591148)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645174.687425955)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645189.508361656)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645200.037719302)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645209.799565772)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645219.897405836)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645225.109318055)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645194.964809252)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645205.110947858)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645214.524885341)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645229.928890298)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645239.498540069)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645251.067481983)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1645260.829750388)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645265.200749416)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645234.870552859)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645245.82522494)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645255.356627277)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645270.593600812)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645280.421634324)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645291.430563055)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645300.362622266)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645306.056821452)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645275.747422722)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645286.50887524)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645296.043243703)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645310.91730691)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645321.063849058)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645331.981586577)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645342.768346733)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645348.398102474)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645315.516046532)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645326.02013207)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645337.499092254)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645353.48158621)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645363.172785708)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645372.084352735)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645382.382953931)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645387.175946224)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645358.217367836)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645367.644478462)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1645377.950297334)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645392.26716259)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645400.554147755)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645410.05046116)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645419.966479073)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645425.34567658)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645396.34119675)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645405.295609417)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645415.343155332)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645429.652503739)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645542.908466963)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645548.047177346)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645553.22546679)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645558.440457123)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645569.496244194)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645573.922282564)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645434.935170141)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645564.05732017)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645586.460518688)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645590.936515905)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645599.828207012)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645604.916184826)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645578.141787146)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645582.021300719)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645595.217721073)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645610.00194849)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645615.031966306)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645619.762307053)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645624.230932189)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645634.67952153)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645643.601230578)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645647.956246086)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645628.908010997)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645639.315810966)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645652.278073775)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645661.129936673)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645671.206734006)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645680.706588997)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645686.049113153)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645656.2241336)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645666.196272904)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645675.963166747)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645690.656829603)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645699.337867695)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645709.157759732)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645718.667307095)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645723.815849886)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645694.790822505)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645704.137378936)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645714.302870828)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645728.137419548)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645738.505082847)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645748.808055391)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645759.732777546)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645764.462990738)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645733.754819995)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645743.297666041)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1645754.69723586)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645769.73587654)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645780.008752741)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645789.764649196)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645799.417615762)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645775.183693685)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645785.001099537)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645794.244890132)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645804.837600333)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645810.183591776)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645819.429875115)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645831.415316564)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645841.558771934)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645846.481252719)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645814.739936386)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645825.630821706)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645837.371095945)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645851.693512984)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645861.363107076)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645871.510623274)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645880.683694871)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645885.774914183)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645856.040267294)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645866.936162233)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645876.107284981)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645890.444250545)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645901.489824658)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645912.237950406)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1645922.323441337)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645927.668536809)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645896.718198291)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645906.770033668)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645917.05789331)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645932.992913962)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645942.570639304)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645953.519166589)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645964.178139044)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645970.071850805)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645937.521387298)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645948.147918726)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645958.616893228)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645976.690398106)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645987.481105759)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1645997.672165621)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646007.721175567)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646013.476471896)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645982.088576879)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1645992.250121242)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646003.027137809)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646018.82776404)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646029.275435708)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646040.27396055)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646052.115402825)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646057.40278756)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646023.544514022)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646034.935983358)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1646045.829128705)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646062.51661199)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646072.995292206)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646083.674028126)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646094.328813005)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646100.375855735)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646067.78035639)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646078.321719462)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646089.254236019)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646106.096762268)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646117.014872949)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646126.706374896)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646137.88045704)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646142.607522165)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646111.211702979)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646121.733311591)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646133.00205679)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646148.033305785)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646159.419636575)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646170.542110165)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646180.602461756)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646154.391296403)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646185.7719461)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646165.114520886)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646175.947464633)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646190.956883304)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646201.820183676)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646212.045016296)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1646222.341820265)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646228.581037572)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646206.841423067)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646217.099986621)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646195.562463069)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646233.243623126)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646243.11791665)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646254.460353346)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646265.199568476)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646271.491194769)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646238.086957871)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646248.385622013)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646259.419129925)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646276.060412683)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646285.607202893)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646295.322300773)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646305.745554806)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646310.709240209)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646280.5057615)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646290.248606594)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646300.790751955)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646316.013395574)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646326.781913297)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646336.855288655)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646347.87841296)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646353.517572754)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646321.518060802)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646332.022771194)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1646342.074640801)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646359.142320733)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646372.658570306)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646385.247730546)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646394.89231549)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646378.549568342)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646389.962309374)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646400.188265258)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646365.10284885)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646405.713103496)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646416.285420725)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646427.454497601)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646437.381664541)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646444.880094912)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646411.024481592)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646432.579838199)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646421.911326486)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646451.109087229)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646462.03983046)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646473.691586275)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646483.913954192)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646489.528678697)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646456.803060874)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646478.260940289)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646468.051299904)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646494.448308455)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646504.627779279)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646514.774820225)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1646524.660013955)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646530.344515088)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646499.652125269)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646509.888890887)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646519.81631852)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646535.553198442)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646546.549268259)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646556.529505254)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646566.331699533)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646571.308343778)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646540.84368479)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646551.845039302)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646561.551620292)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646576.4662554)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646588.057997926)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646598.324246988)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646608.109445836)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646612.616386006)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646581.739572668)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646593.034835742)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646602.717289291)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646617.530835686)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646627.498918358)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646637.220037653)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646647.013752942)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646652.749633475)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646622.55249283)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646632.718163508)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1646642.04843303)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646657.302934204)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646667.922402605)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646678.092667998)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646688.905684435)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646693.742769429)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646662.334692499)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646673.059348511)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646683.672835529)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646698.357264395)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646708.960180283)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646719.649652053)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646731.399809246)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646736.635473977)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646703.862477403)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646725.04427927)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646714.251225134)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646741.432879315)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646751.481903521)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646761.921499487)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646772.402610147)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646777.505167303)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646746.174738612)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646756.97458243)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646767.887087609)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646782.18304858)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646792.636557147)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646803.683728994)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1646813.229955649)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646818.024675982)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646787.10720172)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646797.999464699)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646808.76916202)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646822.992438119)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646832.549252308)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646842.260510615)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646852.531545581)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646857.369116777)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646827.486907703)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646837.375636322)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646847.304970342)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646862.247405076)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646871.83224995)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646881.30882443)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646891.328260124)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646896.715164765)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646876.541403907)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646866.747237747)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646886.227542571)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646901.406310243)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646911.298430808)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646922.156113739)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646932.170672528)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646937.293390118)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646906.151664029)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646917.292474916)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1646927.513224158)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646941.87764368)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646952.121052173)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646961.46105138)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646971.551273402)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646976.881297714)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646947.545569963)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646956.49616625)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646966.692998892)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646981.902551733)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646991.925023852)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647002.82764222)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647013.662610357)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647018.450479453)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1646986.873798123)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1646997.254115149)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647008.666380711)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647023.519239703)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647037.485692077)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647047.769203305)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647057.90547323)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647063.675752147)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647030.538366594)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647042.979089265)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647052.425417527)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647069.934792549)])']\n", "connector: \n", "Evaluating workflow: 67%|██████▋ | 2002/3000 [2:49:17<1:35:20, 5.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:38:59.814\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2003/3000 [2:49:21<1:30:40, 5.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:39:04.629\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2004/3000 [2:49:28<1:36:24, 5.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:39:11.255\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2005/3000 [2:49:33<1:32:10, 5.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:39:16.233\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2006/3000 [2:49:38<1:26:48, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:39:20.729\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2007/3000 [2:49:43<1:26:15, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:39:25.875\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2008/3000 [2:49:47<1:20:57, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:39:30.036\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2009/3000 [2:49:52<1:19:58, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:39:34.752\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2010/3000 [2:49:57<1:22:05, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:39:40.037\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2011/3000 [2:50:02<1:21:51, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:39:44.984\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2012/3000 [2:50:07<1:22:21, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:39:50.067\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2013/3000 [2:50:12<1:23:27, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:39:55.308\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2014/3000 [2:50:17<1:20:23, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:39:59.777\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2015/3000 [2:50:21<1:18:42, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:40:04.344\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2016/3000 [2:50:27<1:23:40, 5.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:40:10.165\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2017/3000 [2:50:32<1:23:42, 5.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:40:15.289\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2018/3000 [2:50:36<1:19:33, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:40:19.572\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2019/3000 [2:50:42<1:24:14, 5.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:40:25.403\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2020/3000 [2:50:47<1:24:46, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:40:30.682\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2021/3000 [2:50:52<1:23:33, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:40:35.641\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2022/3000 [2:50:58<1:25:25, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:40:41.161\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2023/3000 [2:51:04<1:31:08, 5.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:40:47.592\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 67%|██████▋ | 2024/3000 [2:51:10<1:32:39, 5.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:40:53.516\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2025/3000 [2:51:15<1:27:57, 5.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:40:58.268\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2026/3000 [2:51:20<1:26:53, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:41:03.480\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2027/3000 [2:51:26<1:26:17, 5.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:41:08.729\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2028/3000 [2:51:31<1:27:13, 5.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:41:14.261\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2029/3000 [2:51:36<1:24:16, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:41:19.054\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2030/3000 [2:51:41<1:23:22, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:41:24.095\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2031/3000 [2:51:45<1:19:29, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:41:28.468\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2032/3000 [2:51:50<1:17:37, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:41:33.020\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2033/3000 [2:51:55<1:19:46, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:41:38.293\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2034/3000 [2:52:00<1:18:33, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:41:43.008\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2035/3000 [2:52:04<1:16:50, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:41:47.550\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2036/3000 [2:52:09<1:15:18, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:41:52.024\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2037/3000 [2:52:14<1:18:10, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:41:57.325\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2038/3000 [2:52:18<1:13:15, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:42:01.190\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2039/3000 [2:52:22<1:12:48, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:42:05.679\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2040/3000 [2:52:27<1:11:35, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:42:09.990\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2041/3000 [2:52:31<1:11:37, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:42:14.485\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2042/3000 [2:52:36<1:11:21, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:42:18.928\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2043/3000 [2:52:40<1:11:21, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:42:23.411\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2044/3000 [2:52:45<1:11:37, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:42:27.956\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2045/3000 [2:52:50<1:12:50, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:42:32.721\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2046/3000 [2:52:56<1:20:30, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:42:38.922\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2047/3000 [2:53:00<1:18:56, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:42:43.674\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2048/3000 [2:53:05<1:15:30, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:42:47.938\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2049/3000 [2:53:09<1:12:13, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:42:52.024\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2050/3000 [2:53:13<1:09:12, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:42:55.962\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2051/3000 [2:53:17<1:07:06, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:42:59.907\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2052/3000 [2:53:21<1:04:52, 4.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:43:03.692\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2053/3000 [2:53:24<1:04:14, 4.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:43:07.680\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2054/3000 [2:53:29<1:04:45, 4.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:43:11.874\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 68%|██████▊ | 2055/3000 [2:53:33<1:05:25, 4.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:43:16.136\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▊ | 2056/3000 [2:53:38<1:07:43, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:43:20.793\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▊ | 2057/3000 [2:53:41<1:04:54, 4.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:43:24.513\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▊ | 2058/3000 [2:53:46<1:06:05, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:43:28.910\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▊ | 2059/3000 [2:53:50<1:07:48, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:43:33.501\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▊ | 2060/3000 [2:53:55<1:10:10, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:43:38.341\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▊ | 2061/3000 [2:54:00<1:13:42, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:43:43.589\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▊ | 2062/3000 [2:54:04<1:10:20, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:43:47.598\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2063/3000 [2:54:09<1:10:13, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:43:52.088\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2064/3000 [2:54:13<1:08:55, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:43:56.325\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2065/3000 [2:54:17<1:08:06, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:44:00.585\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2066/3000 [2:54:22<1:08:53, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:44:05.136\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2067/3000 [2:54:26<1:07:54, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:44:09.368\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2068/3000 [2:54:30<1:07:18, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:44:13.623\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2069/3000 [2:54:35<1:07:20, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:44:17.978\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2070/3000 [2:54:40<1:10:15, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:44:22.962\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2071/3000 [2:54:45<1:11:30, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:44:27.779\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2072/3000 [2:54:49<1:08:33, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:44:31.778\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2073/3000 [2:54:52<1:04:08, 4.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:44:35.273\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2074/3000 [2:54:57<1:08:10, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:44:40.312\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2075/3000 [2:55:01<1:06:06, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:44:44.299\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2076/3000 [2:55:05<1:05:47, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:44:48.532\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2077/3000 [2:55:09<1:04:42, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:44:52.586\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2078/3000 [2:55:14<1:06:38, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:44:57.228\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2079/3000 [2:55:18<1:06:58, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:45:01.652\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2080/3000 [2:55:23<1:06:36, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:45:05.950\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2081/3000 [2:55:27<1:04:15, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:45:09.800\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2082/3000 [2:55:31<1:05:15, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:45:14.229\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2083/3000 [2:55:35<1:04:09, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:45:18.268\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 69%|██████▉ | 2084/3000 [2:55:39<1:03:07, 4.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:45:22.257\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 2085/3000 [2:55:44<1:05:12, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:45:26.862\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 2086/3000 [2:55:48<1:06:20, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:45:31.403\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 2087/3000 [2:55:52<1:04:26, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:45:35.356\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 2088/3000 [2:55:56<1:04:42, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:45:39.665\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 2089/3000 [2:56:01<1:05:21, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:45:44.080\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 2090/3000 [2:56:05<1:02:52, 4.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:45:47.854\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 2091/3000 [2:56:09<1:01:26, 4.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:45:51.699\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 2092/3000 [2:56:13<1:01:52, 4.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:45:55.867\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 2093/3000 [2:56:17<1:03:28, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:46:00.324\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 2094/3000 [2:56:21<1:01:24, 4.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:46:04.082\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 2095/3000 [2:56:25<1:02:41, 4.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:46:08.448\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 2096/3000 [2:56:29<1:01:30, 4.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:46:12.358\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 2097/3000 [2:56:33<1:01:46, 4.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:46:16.513\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 2098/3000 [2:56:38<1:03:17, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:46:20.969\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|██████▉ | 2099/3000 [2:56:42<1:03:35, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:46:25.261\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 2100/3000 [2:56:46<1:02:56, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:46:29.367\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 2101/3000 [2:56:50<1:02:50, 4.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:46:33.555\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 2102/3000 [2:56:55<1:04:01, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:46:38.029\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 2103/3000 [2:56:59<1:04:30, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:46:42.429\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 2104/3000 [2:57:03<1:03:37, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:46:46.563\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 2105/3000 [2:57:09<1:09:21, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:46:52.121\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 2106/3000 [2:57:13<1:07:37, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:46:56.401\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 2107/3000 [2:57:17<1:04:56, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:47:00.354\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 2108/3000 [2:57:21<1:02:55, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:47:04.282\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 2109/3000 [2:57:26<1:04:47, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:47:08.950\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 2110/3000 [2:57:32<1:11:03, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:47:14.739\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 2111/3000 [2:57:36<1:08:36, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:47:18.995\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 2112/3000 [2:57:41<1:11:39, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:47:24.330\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 2113/3000 [2:57:46<1:09:32, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:47:28.712\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 2114/3000 [2:57:50<1:07:02, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:47:32.869\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 70%|███████ | 2115/3000 [2:57:54<1:06:30, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:47:37.305\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2116/3000 [2:57:59<1:06:36, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:47:41.855\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2117/3000 [2:58:03<1:07:51, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:47:46.674\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2118/3000 [2:58:08<1:06:06, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:47:50.905\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2119/3000 [2:58:12<1:05:34, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:47:55.301\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2120/3000 [2:58:16<1:03:13, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:47:59.250\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2121/3000 [2:58:21<1:03:57, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:48:03.743\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2122/3000 [2:58:25<1:05:00, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:48:08.366\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2123/3000 [2:58:30<1:06:01, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:48:13.056\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2124/3000 [2:58:36<1:13:07, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:48:19.210\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2125/3000 [2:58:41<1:12:33, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:48:24.110\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2126/3000 [2:58:45<1:09:40, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:48:28.445\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2127/3000 [2:58:49<1:06:36, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:48:32.545\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2128/3000 [2:58:53<1:02:35, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:48:36.219\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2129/3000 [2:58:57<1:01:53, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:48:40.383\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2130/3000 [2:59:03<1:07:29, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:48:45.947\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2131/3000 [2:59:08<1:08:00, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:48:50.738\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2132/3000 [2:59:12<1:06:25, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:48:55.088\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2133/3000 [2:59:16<1:04:48, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:48:59.326\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2134/3000 [2:59:20<1:03:09, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:49:03.447\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2135/3000 [2:59:24<1:01:15, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:49:07.398\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2136/3000 [2:59:28<1:00:30, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:49:11.493\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████ | 2137/3000 [2:59:33<1:03:11, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:49:16.333\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 2138/3000 [2:59:38<1:03:23, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:49:20.788\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 2139/3000 [2:59:44<1:12:53, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:49:27.424\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 2140/3000 [2:59:48<1:08:44, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:49:31.557\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 2141/3000 [2:59:53<1:05:47, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:49:35.688\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 2142/3000 [2:59:57<1:06:33, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:49:40.480\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 2143/3000 [3:00:02<1:06:15, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:49:45.080\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 71%|███████▏ | 2144/3000 [3:00:06<1:02:44, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:49:48.918\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2145/3000 [3:00:11<1:05:53, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:49:54.070\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2146/3000 [3:00:16<1:05:59, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:49:58.734\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2147/3000 [3:00:20<1:05:52, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:50:03.361\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2148/3000 [3:00:25<1:06:53, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:50:08.250\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2149/3000 [3:00:30<1:06:14, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:50:12.827\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2150/3000 [3:00:34<1:05:33, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:50:17.357\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2151/3000 [3:00:39<1:04:43, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:50:21.804\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2152/3000 [3:00:43<1:01:47, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:50:25.705\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2153/3000 [3:00:48<1:05:05, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:50:30.872\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2154/3000 [3:01:04<1:54:59, 8.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:50:47.299\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2155/3000 [3:01:09<1:39:16, 7.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:50:51.767\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2156/3000 [3:01:13<1:26:18, 6.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:50:55.772\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2157/3000 [3:01:17<1:18:53, 5.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:51:00.170\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2158/3000 [3:01:22<1:18:19, 5.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:51:05.675\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2159/3000 [3:01:28<1:16:12, 5.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:51:10.773\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2160/3000 [3:01:32<1:13:52, 5.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:51:15.676\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2161/3000 [3:01:37<1:11:35, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:51:20.430\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2162/3000 [3:01:42<1:10:05, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:51:25.212\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2163/3000 [3:01:48<1:12:39, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:51:30.864\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2164/3000 [3:01:52<1:09:32, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:51:35.347\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2165/3000 [3:01:56<1:06:13, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:51:39.564\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2166/3000 [3:02:01<1:05:17, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:51:44.117\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2167/3000 [3:02:06<1:04:55, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:51:48.743\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2168/3000 [3:02:10<1:05:09, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:51:53.496\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2169/3000 [3:02:15<1:05:22, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:51:58.265\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2170/3000 [3:02:20<1:05:53, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:52:03.128\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2171/3000 [3:02:25<1:05:05, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:52:07.717\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2172/3000 [3:02:29<1:02:05, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:52:11.725\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2173/3000 [3:02:33<1:02:46, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:52:16.407\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▏ | 2174/3000 [3:02:37<58:57, 4.28s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:52:20.056\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 72%|███████▎ | 2175/3000 [3:02:41<59:57, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:52:24.599\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2176/3000 [3:02:46<59:43, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:52:28.920\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2177/3000 [3:02:50<57:53, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:52:32.842\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2178/3000 [3:02:54<57:33, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:52:36.997\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2179/3000 [3:02:58<59:07, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:52:41.596\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2180/3000 [3:03:03<1:01:01, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:52:46.399\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2181/3000 [3:03:07<1:00:06, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:52:50.660\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2182/3000 [3:03:12<1:00:32, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:52:55.185\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2183/3000 [3:03:17<1:02:08, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:53:00.036\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2184/3000 [3:03:21<1:00:17, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:53:04.165\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2185/3000 [3:03:26<1:00:39, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:53:08.708\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2186/3000 [3:03:30<1:01:52, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:53:13.490\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2187/3000 [3:03:35<1:00:42, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:53:17.782\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2188/3000 [3:03:39<1:00:21, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:53:22.193\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2189/3000 [3:03:43<57:40, 4.27s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:53:26.012\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2190/3000 [3:03:47<58:35, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:53:30.520\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2191/3000 [3:03:52<58:16, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:53:34.800\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2192/3000 [3:03:56<57:20, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:53:38.910\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2193/3000 [3:04:00<58:13, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:53:43.405\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2194/3000 [3:04:05<1:00:38, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:53:48.350\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2195/3000 [3:04:11<1:06:03, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:53:54.230\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2196/3000 [3:04:16<1:04:35, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:53:58.810\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2197/3000 [3:04:20<1:03:58, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:54:03.496\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2198/3000 [3:04:25<1:04:04, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:54:08.319\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2199/3000 [3:04:32<1:10:58, 5.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:54:14.856\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2200/3000 [3:04:36<1:06:17, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:54:19.025\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2201/3000 [3:04:41<1:05:22, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:54:23.789\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2202/3000 [3:04:46<1:05:16, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:54:28.694\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2203/3000 [3:04:49<1:00:52, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:54:32.519\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 73%|███████▎ | 2204/3000 [3:04:54<1:02:58, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:54:37.646\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 2205/3000 [3:04:58<1:00:00, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:54:41.669\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 2206/3000 [3:05:03<58:48, 4.44s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:54:45.912\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 2207/3000 [3:05:07<59:15, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:54:50.489\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 2208/3000 [3:05:11<57:24, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:54:54.523\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 2209/3000 [3:05:21<1:16:48, 5.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:55:03.796\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 2210/3000 [3:05:25<1:09:35, 5.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:55:07.820\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 2211/3000 [3:05:29<1:06:12, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:55:12.271\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▎ | 2212/3000 [3:05:34<1:05:42, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:55:17.199\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2213/3000 [3:05:39<1:05:21, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:55:22.134\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2214/3000 [3:05:44<1:04:25, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:55:26.903\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2215/3000 [3:05:48<1:02:10, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:55:31.266\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2216/3000 [3:05:53<1:01:22, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:55:35.836\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2217/3000 [3:05:57<59:57, 4.59s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:55:40.189\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2218/3000 [3:06:02<1:01:51, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:55:45.292\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2219/3000 [3:06:07<1:03:31, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:55:50.482\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2220/3000 [3:06:12<1:02:33, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:55:55.138\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2221/3000 [3:06:16<1:00:37, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:55:59.475\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2222/3000 [3:06:21<1:00:12, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:56:04.056\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2223/3000 [3:06:26<1:02:39, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:56:09.348\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2224/3000 [3:06:31<1:02:41, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:56:14.217\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2225/3000 [3:06:35<59:59, 4.64s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:56:18.387\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2226/3000 [3:06:41<1:04:19, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:56:24.170\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2227/3000 [3:06:45<1:01:53, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:56:28.551\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2228/3000 [3:06:50<59:44, 4.64s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:56:32.817\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2229/3000 [3:06:54<58:14, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:56:37.091\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2230/3000 [3:06:59<58:26, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:56:41.695\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2231/3000 [3:07:03<57:50, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:56:46.113\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2232/3000 [3:07:07<56:37, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:56:50.328\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2233/3000 [3:07:12<58:39, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:56:55.302\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2234/3000 [3:07:16<57:23, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:56:59.580\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 74%|███████▍ | 2235/3000 [3:07:22<59:53, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:57:04.747\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 2236/3000 [3:07:26<58:57, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:57:09.220\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 2237/3000 [3:07:31<59:07, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:57:13.914\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 2238/3000 [3:07:34<54:43, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:57:17.430\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 2239/3000 [3:07:39<56:11, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:57:22.143\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 2240/3000 [3:07:44<57:02, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:57:26.818\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 2241/3000 [3:07:48<54:42, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:57:30.725\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 2242/3000 [3:07:52<54:42, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:57:35.071\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 2243/3000 [3:07:58<59:43, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:57:40.744\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 2244/3000 [3:08:02<59:25, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:57:45.420\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 2245/3000 [3:08:07<57:53, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:57:49.753\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 2246/3000 [3:08:11<56:54, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:57:54.112\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 2247/3000 [3:08:16<59:41, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:57:59.401\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 2248/3000 [3:08:20<55:32, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:58:03.075\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▍ | 2249/3000 [3:08:24<53:08, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:58:06.888\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 2250/3000 [3:08:29<55:46, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:58:11.855\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 2251/3000 [3:08:33<55:41, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:58:16.315\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 2252/3000 [3:08:38<57:11, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:58:21.197\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 2253/3000 [3:08:43<57:10, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:58:25.801\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 2254/3000 [3:08:47<56:59, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:58:30.366\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 2255/3000 [3:08:52<56:11, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:58:34.754\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 2256/3000 [3:08:56<56:38, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:58:39.421\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 2257/3000 [3:09:01<55:42, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:58:43.758\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 2258/3000 [3:09:05<53:57, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:58:47.804\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 2259/3000 [3:09:09<53:37, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:58:52.096\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 2260/3000 [3:09:13<52:00, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:58:56.023\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 2261/3000 [3:09:17<51:25, 4.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:59:00.102\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 2262/3000 [3:09:21<50:05, 4.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:59:03.932\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 2263/3000 [3:09:24<48:38, 3.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:59:07.631\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 75%|███████▌ | 2264/3000 [3:09:28<47:51, 3.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:59:11.395\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2265/3000 [3:09:34<54:08, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:59:17.023\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2266/3000 [3:09:38<53:58, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:59:21.419\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2267/3000 [3:09:43<56:50, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:59:26.635\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2268/3000 [3:09:47<53:26, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:59:30.379\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2269/3000 [3:09:51<52:39, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:59:34.564\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2270/3000 [3:09:55<51:00, 4.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:59:38.456\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2271/3000 [3:10:00<53:11, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:59:43.268\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2272/3000 [3:10:04<51:26, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:59:47.182\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2273/3000 [3:10:08<50:15, 4.15s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 12:59:51.118\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2274/3000 [3:10:12<49:30, 4.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 12:59:55.078\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2275/3000 [3:10:17<52:43, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:00:00.074\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2276/3000 [3:10:21<51:58, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:00:04.249\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2277/3000 [3:10:26<53:20, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:00:08.954\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2278/3000 [3:10:30<53:23, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:00:13.418\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2279/3000 [3:10:35<53:32, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:00:17.914\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2280/3000 [3:10:39<53:25, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:00:22.361\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2281/3000 [3:10:43<51:01, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:00:26.167\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2282/3000 [3:10:48<52:41, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:00:30.906\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2283/3000 [3:10:52<51:42, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:00:35.057\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2284/3000 [3:10:56<52:34, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:00:39.647\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2285/3000 [3:11:01<51:21, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:00:43.733\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2286/3000 [3:11:04<49:27, 4.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:00:47.530\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▌ | 2287/3000 [3:11:09<50:16, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:00:51.936\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 2288/3000 [3:11:14<54:04, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:00:57.254\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 2289/3000 [3:11:19<55:51, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:01:02.335\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 2290/3000 [3:11:24<57:36, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:01:07.560\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 2291/3000 [3:11:30<59:09, 5.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:01:12.888\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 2292/3000 [3:11:34<57:13, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:01:17.375\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 2293/3000 [3:11:38<54:25, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:01:21.454\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 2294/3000 [3:11:43<54:28, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:01:26.109\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 76%|███████▋ | 2295/3000 [3:11:48<54:15, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:01:30.698\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2296/3000 [3:11:51<51:50, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:01:34.653\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2297/3000 [3:11:56<50:51, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:01:38.813\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2298/3000 [3:12:00<49:49, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:01:42.878\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2299/3000 [3:12:04<49:50, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:01:47.162\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2300/3000 [3:12:08<49:22, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:01:51.315\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2301/3000 [3:12:14<53:34, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:01:56.768\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2302/3000 [3:12:18<53:38, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:02:01.411\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2303/3000 [3:12:23<54:27, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:02:06.279\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2304/3000 [3:12:27<53:17, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:02:10.654\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2305/3000 [3:12:32<53:49, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:02:15.422\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2306/3000 [3:12:37<55:03, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:02:20.449\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2307/3000 [3:12:42<53:39, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:02:24.827\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2308/3000 [3:12:46<52:13, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:02:29.079\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2309/3000 [3:12:50<51:09, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:02:33.319\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2310/3000 [3:12:55<52:28, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:02:38.167\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2311/3000 [3:12:59<50:07, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:02:42.069\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2312/3000 [3:13:04<52:26, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:02:47.128\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2313/3000 [3:13:08<51:56, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:02:51.580\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2314/3000 [3:13:13<50:34, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:02:55.740\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2315/3000 [3:13:17<51:17, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:03:00.395\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2316/3000 [3:13:25<1:01:19, 5.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:03:07.842\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2317/3000 [3:13:29<58:17, 5.12s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:03:12.359\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2318/3000 [3:13:33<55:01, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:03:16.549\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2319/3000 [3:13:38<54:01, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:03:21.117\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2320/3000 [3:13:42<52:59, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:03:25.597\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2321/3000 [3:13:47<53:27, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:03:30.432\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2322/3000 [3:13:52<51:54, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:03:34.725\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2323/3000 [3:13:56<50:39, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:03:38.970\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 77%|███████▋ | 2324/3000 [3:14:00<49:54, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:03:43.262\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2325/3000 [3:14:04<49:42, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:03:47.654\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2326/3000 [3:14:09<49:03, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:03:51.900\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2327/3000 [3:14:13<47:49, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:03:55.922\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2328/3000 [3:14:17<47:23, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:04:00.077\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2329/3000 [3:14:21<47:06, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:04:04.245\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2330/3000 [3:14:26<47:48, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:04:08.686\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2331/3000 [3:14:30<46:52, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:04:12.711\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2332/3000 [3:14:34<47:44, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:04:17.194\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2333/3000 [3:14:40<51:57, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:04:22.767\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2334/3000 [3:14:44<51:42, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:04:27.388\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2335/3000 [3:14:48<50:06, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:04:31.589\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2336/3000 [3:14:52<48:28, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:04:35.640\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2337/3000 [3:14:56<47:08, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:04:39.639\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2338/3000 [3:15:01<47:23, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:04:44.004\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2339/3000 [3:15:05<47:01, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:04:48.210\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2340/3000 [3:15:09<45:08, 4.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:04:51.931\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2341/3000 [3:15:14<48:53, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:04:57.191\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2342/3000 [3:15:19<50:49, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:05:02.254\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2343/3000 [3:15:23<48:09, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:05:06.098\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2344/3000 [3:15:27<47:38, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:05:10.360\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2345/3000 [3:15:32<47:45, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:05:14.776\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2346/3000 [3:15:37<49:27, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:05:19.695\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2347/3000 [3:15:41<48:37, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:05:23.998\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2348/3000 [3:15:45<46:26, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:05:27.819\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2349/3000 [3:15:49<47:17, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:05:32.377\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2350/3000 [3:15:53<46:55, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:05:36.647\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2351/3000 [3:15:58<49:07, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:05:41.676\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2352/3000 [3:16:03<49:05, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:05:46.229\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2353/3000 [3:16:08<48:48, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:05:50.712\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2354/3000 [3:16:12<47:55, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:05:54.989\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 78%|███████▊ | 2355/3000 [3:16:16<48:10, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:05:59.541\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▊ | 2356/3000 [3:16:21<47:32, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:06:03.848\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▊ | 2357/3000 [3:16:25<46:18, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:06:07.917\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▊ | 2358/3000 [3:16:30<47:57, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:06:12.775\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▊ | 2359/3000 [3:16:34<47:36, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:06:17.173\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▊ | 2360/3000 [3:16:39<49:20, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:06:22.192\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▊ | 2361/3000 [3:16:44<49:08, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:06:26.777\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▊ | 2362/3000 [3:16:49<50:22, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:06:31.804\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2363/3000 [3:16:53<49:50, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:06:36.400\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2364/3000 [3:16:58<49:57, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:06:41.154\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2365/3000 [3:17:03<49:42, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:06:45.815\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2366/3000 [3:17:09<53:44, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:06:51.809\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2367/3000 [3:17:14<53:17, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:06:56.780\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2368/3000 [3:17:18<50:32, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:07:00.987\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2369/3000 [3:17:22<49:56, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:07:05.620\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2370/3000 [3:17:27<50:05, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:07:10.442\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2371/3000 [3:17:31<47:04, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:07:14.277\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2372/3000 [3:17:36<47:37, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:07:18.966\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2373/3000 [3:17:40<47:22, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:07:23.459\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2374/3000 [3:17:45<48:23, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:07:28.344\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2375/3000 [3:17:50<48:44, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:07:33.119\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2376/3000 [3:17:56<52:49, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:07:39.131\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2377/3000 [3:18:01<54:00, 5.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:07:44.618\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2378/3000 [3:18:06<51:01, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:07:48.889\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2379/3000 [3:18:10<49:51, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:07:53.463\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2380/3000 [3:18:15<48:26, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:07:57.848\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2381/3000 [3:18:19<46:31, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:08:01.940\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2382/3000 [3:18:23<46:46, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:08:06.553\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2383/3000 [3:18:30<51:35, 5.02s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:08:12.684\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 79%|███████▉ | 2384/3000 [3:18:34<48:35, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:08:16.753\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 2385/3000 [3:18:38<47:18, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:08:21.093\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 2386/3000 [3:18:43<47:20, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:08:25.747\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 2387/3000 [3:18:48<48:44, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:08:30.855\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 2388/3000 [3:18:52<46:23, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:08:34.880\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 2389/3000 [3:18:56<45:54, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:08:39.298\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 2390/3000 [3:19:00<45:12, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:08:43.601\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 2391/3000 [3:19:05<44:18, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:08:47.774\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 2392/3000 [3:19:11<50:40, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:08:54.256\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 2393/3000 [3:19:15<48:26, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:08:58.553\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 2394/3000 [3:19:20<47:57, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:09:03.207\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 2395/3000 [3:19:24<47:00, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:09:07.666\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 2396/3000 [3:19:29<47:18, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:09:12.453\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 2397/3000 [3:19:35<49:24, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:09:17.878\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 2398/3000 [3:19:39<48:34, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:09:22.540\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|███████▉ | 2399/3000 [3:19:44<46:33, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:09:26.739\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2400/3000 [3:19:49<47:37, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:09:31.769\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2401/3000 [3:19:53<45:29, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:09:35.847\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2402/3000 [3:19:57<43:35, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:09:39.792\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2403/3000 [3:20:01<43:14, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:09:44.075\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2404/3000 [3:20:05<42:04, 4.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:09:48.054\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2405/3000 [3:20:09<41:24, 4.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:09:52.089\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2406/3000 [3:20:13<41:19, 4.17s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:09:56.260\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2407/3000 [3:20:17<40:38, 4.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:10:00.225\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2408/3000 [3:20:21<41:26, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:10:04.632\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2409/3000 [3:20:27<46:02, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:10:10.415\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2410/3000 [3:20:32<46:55, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:10:15.411\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2411/3000 [3:20:36<44:39, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:10:19.442\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2412/3000 [3:20:41<45:42, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:10:24.375\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2413/3000 [3:20:46<44:56, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:10:28.803\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2414/3000 [3:20:50<44:04, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:10:33.126\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 80%|████████ | 2415/3000 [3:20:55<45:59, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:10:38.321\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2416/3000 [3:21:00<45:34, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:10:42.924\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2417/3000 [3:21:04<45:37, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:10:47.649\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2418/3000 [3:21:10<46:55, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:10:52.816\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2419/3000 [3:21:14<45:36, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:10:57.230\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2420/3000 [3:21:35<1:31:34, 9.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:11:17.819\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2421/3000 [3:21:39<1:18:03, 8.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:11:22.675\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2422/3000 [3:21:46<1:12:24, 7.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:11:28.855\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2423/3000 [3:21:51<1:04:46, 6.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:11:33.773\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2424/3000 [3:21:55<57:16, 5.97s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:11:37.944\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2425/3000 [3:22:00<53:45, 5.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:11:42.719\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2426/3000 [3:22:04<51:22, 5.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:11:47.532\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2427/3000 [3:22:09<49:14, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:11:52.190\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2428/3000 [3:22:14<48:50, 5.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:11:57.234\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2429/3000 [3:22:19<48:09, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:12:02.151\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2430/3000 [3:22:24<47:00, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:12:06.836\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2431/3000 [3:22:28<46:34, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:12:11.661\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2432/3000 [3:22:33<45:01, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:12:16.053\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2433/3000 [3:22:37<44:07, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:12:20.521\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2434/3000 [3:22:42<44:25, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:12:25.326\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2435/3000 [3:22:47<44:14, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:12:29.997\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2436/3000 [3:22:52<45:23, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:12:35.132\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████ | 2437/3000 [3:22:57<45:01, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:12:39.859\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 2438/3000 [3:23:01<44:34, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:12:44.524\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 2439/3000 [3:23:06<44:43, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:12:49.369\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 2440/3000 [3:23:11<43:38, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:12:53.789\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 2441/3000 [3:23:15<42:53, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:12:58.228\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 2442/3000 [3:23:21<45:49, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:13:03.908\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 2443/3000 [3:23:26<46:12, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:13:09.002\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 81%|████████▏ | 2444/3000 [3:23:30<44:59, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:13:13.575\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2445/3000 [3:23:35<43:16, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:13:17.842\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2446/3000 [3:23:39<42:08, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:13:22.135\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2447/3000 [3:23:43<40:03, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:13:25.972\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2448/3000 [3:23:47<40:40, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:13:30.570\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2449/3000 [3:23:53<44:24, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:13:36.376\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2450/3000 [3:23:59<46:30, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:13:42.000\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2451/3000 [3:24:04<46:59, 5.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:13:47.281\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2452/3000 [3:24:08<44:41, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:13:51.609\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2453/3000 [3:24:13<42:30, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:13:55.731\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2454/3000 [3:24:17<41:31, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:14:00.067\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2455/3000 [3:24:23<45:50, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:14:06.240\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2456/3000 [3:24:29<46:58, 5.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:14:11.733\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2457/3000 [3:24:33<46:00, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:14:16.594\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2458/3000 [3:24:38<43:50, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:14:20.908\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2459/3000 [3:24:42<43:13, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:14:25.563\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2460/3000 [3:24:47<42:37, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:14:30.163\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2461/3000 [3:24:52<42:16, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:14:34.801\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2462/3000 [3:24:56<41:25, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:14:39.218\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2463/3000 [3:25:01<42:17, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:14:44.191\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2464/3000 [3:25:05<40:24, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:14:48.242\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2465/3000 [3:25:09<39:02, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:14:52.286\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2466/3000 [3:25:14<39:34, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:14:56.892\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2467/3000 [3:25:18<38:45, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:15:01.058\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2468/3000 [3:25:24<42:16, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:15:06.772\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2469/3000 [3:25:28<42:34, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:15:11.681\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2470/3000 [3:25:33<41:21, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:15:16.063\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2471/3000 [3:25:38<41:38, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:15:20.880\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2472/3000 [3:25:42<41:27, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:15:25.564\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2473/3000 [3:25:47<39:53, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:15:29.711\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▏ | 2474/3000 [3:25:51<38:38, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:15:33.809\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 82%|████████▎ | 2475/3000 [3:25:56<40:01, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:15:38.772\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2476/3000 [3:26:00<39:58, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:15:43.352\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2477/3000 [3:26:04<38:01, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:15:47.213\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2478/3000 [3:26:09<39:10, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:15:52.046\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2479/3000 [3:26:14<39:38, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:15:56.756\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2480/3000 [3:26:18<39:52, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:16:01.442\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2481/3000 [3:26:23<39:21, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:16:05.871\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2482/3000 [3:26:31<49:49, 5.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:16:14.491\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2483/3000 [3:26:36<46:06, 5.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:16:18.860\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2484/3000 [3:26:40<42:45, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:16:22.949\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2485/3000 [3:26:45<42:22, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:16:27.806\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2486/3000 [3:26:49<40:28, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:16:32.036\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2487/3000 [3:26:54<40:19, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:16:36.733\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2488/3000 [3:26:58<39:54, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:16:41.315\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2489/3000 [3:27:03<39:35, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:16:45.901\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2490/3000 [3:27:09<43:33, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:16:52.138\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2491/3000 [3:27:14<43:02, 5.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:16:57.089\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2492/3000 [3:27:19<42:50, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:17:02.120\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2493/3000 [3:27:23<39:47, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:17:06.006\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2494/3000 [3:27:27<39:28, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:17:10.625\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2495/3000 [3:27:33<41:58, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:17:16.324\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2496/3000 [3:27:38<41:36, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:17:21.198\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2497/3000 [3:27:43<40:31, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:17:25.754\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2498/3000 [3:27:47<39:15, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:17:30.119\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2499/3000 [3:27:51<36:21, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:17:33.684\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2500/3000 [3:27:55<35:55, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:17:37.893\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2501/3000 [3:27:59<36:08, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:17:42.320\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2502/3000 [3:28:05<40:16, 4.85s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:17:48.356\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2503/3000 [3:28:11<42:10, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:17:54.001\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 83%|████████▎ | 2504/3000 [3:28:16<41:18, 5.00s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:17:58.778\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2505/3000 [3:28:21<42:48, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:18:04.418\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2506/3000 [3:28:26<41:37, 5.06s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:18:09.164\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2507/3000 [3:28:33<46:37, 5.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:18:16.282\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2508/3000 [3:28:39<46:13, 5.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:18:21.828\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2509/3000 [3:28:43<42:42, 5.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:18:26.074\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2510/3000 [3:28:47<39:44, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:18:30.116\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2511/3000 [3:28:52<39:43, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:18:35.011\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▎ | 2512/3000 [3:28:56<37:35, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:18:39.044\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2513/3000 [3:29:00<36:24, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:18:43.211\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2514/3000 [3:29:05<38:32, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:18:48.603\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2515/3000 [3:29:10<38:03, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:18:53.194\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2516/3000 [3:29:14<37:12, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:18:57.584\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2517/3000 [3:29:19<36:35, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:19:01.971\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2518/3000 [3:29:23<34:38, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:19:05.741\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2519/3000 [3:29:27<35:18, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:19:10.359\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2520/3000 [3:29:32<36:15, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:19:15.194\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2521/3000 [3:29:36<35:37, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:19:19.489\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2522/3000 [3:29:40<34:37, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:19:23.568\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2523/3000 [3:29:45<34:31, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:19:27.900\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2524/3000 [3:29:50<37:10, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:19:33.391\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2525/3000 [3:29:55<38:29, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:19:38.659\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2526/3000 [3:30:01<39:20, 4.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:19:43.916\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2527/3000 [3:30:06<38:47, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:19:48.700\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2528/3000 [3:30:10<37:55, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:19:53.286\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2529/3000 [3:30:14<36:22, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:19:57.481\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2530/3000 [3:30:19<36:29, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:20:02.198\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2531/3000 [3:30:23<35:40, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:20:06.543\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2532/3000 [3:30:28<35:25, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:20:11.033\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2533/3000 [3:30:32<34:57, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:20:15.405\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2534/3000 [3:30:37<36:06, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:20:20.423\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 84%|████████▍ | 2535/3000 [3:30:42<36:35, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:20:25.313\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2536/3000 [3:30:47<36:11, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:20:29.900\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2537/3000 [3:30:51<36:08, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:20:34.591\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2538/3000 [3:30:56<34:49, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:20:38.734\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2539/3000 [3:31:01<35:51, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:20:43.740\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2540/3000 [3:31:05<34:24, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:20:47.810\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2541/3000 [3:31:09<33:13, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:20:51.812\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2542/3000 [3:31:13<33:59, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:20:56.521\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2543/3000 [3:31:19<35:37, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:21:01.726\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2544/3000 [3:31:23<34:21, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:21:05.879\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2545/3000 [3:31:27<34:43, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:21:10.596\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2546/3000 [3:31:32<34:41, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:21:15.190\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2547/3000 [3:31:38<37:16, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:21:20.952\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2548/3000 [3:31:42<36:06, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:21:25.408\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▍ | 2549/3000 [3:31:48<38:33, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:21:31.320\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2550/3000 [3:31:53<37:02, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:21:35.820\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2551/3000 [3:31:57<35:21, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:21:40.040\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2552/3000 [3:32:02<36:52, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:21:45.479\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2553/3000 [3:32:07<35:20, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:21:49.772\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2554/3000 [3:32:11<35:30, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:21:54.623\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2555/3000 [3:32:16<34:04, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:21:58.788\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2556/3000 [3:32:21<35:05, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:22:03.881\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2557/3000 [3:32:25<34:58, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:22:08.603\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2558/3000 [3:32:30<34:15, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:22:13.053\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2559/3000 [3:32:34<33:26, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:22:17.367\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2560/3000 [3:32:38<32:32, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:22:21.545\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2561/3000 [3:32:42<31:47, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:22:25.676\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2562/3000 [3:32:47<31:45, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:22:30.037\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2563/3000 [3:32:51<31:02, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:22:34.093\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 85%|████████▌ | 2564/3000 [3:32:56<31:57, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:22:38.810\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2565/3000 [3:33:00<31:43, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:22:43.131\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2566/3000 [3:33:04<30:35, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:22:47.017\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2567/3000 [3:33:08<29:53, 4.14s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:22:50.959\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2568/3000 [3:33:12<29:39, 4.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:22:55.025\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2569/3000 [3:33:16<30:17, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:22:59.468\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2570/3000 [3:33:20<29:25, 4.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:23:03.317\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2571/3000 [3:33:24<29:24, 4.11s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:23:07.443\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2572/3000 [3:33:29<31:01, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:23:12.346\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2573/3000 [3:33:34<31:18, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:23:16.864\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2574/3000 [3:33:38<30:14, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:23:20.793\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2575/3000 [3:33:42<30:15, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:23:25.092\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2576/3000 [3:33:46<30:43, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:23:29.619\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2577/3000 [3:33:51<31:47, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:23:34.508\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2578/3000 [3:33:55<30:57, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:23:38.655\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2579/3000 [3:34:00<31:22, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:23:43.291\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2580/3000 [3:34:04<29:46, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:23:47.040\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2581/3000 [3:34:10<33:20, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:23:53.030\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2582/3000 [3:34:14<32:35, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:23:57.480\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2583/3000 [3:34:19<31:45, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:24:01.801\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2584/3000 [3:34:23<31:53, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:24:06.470\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2585/3000 [3:34:28<32:31, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:24:11.409\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2586/3000 [3:34:33<32:23, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:24:16.084\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▌ | 2587/3000 [3:34:37<32:05, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:24:20.669\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2588/3000 [3:34:42<31:45, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:24:25.207\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2589/3000 [3:34:46<30:56, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:24:29.477\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2590/3000 [3:34:51<31:36, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:24:34.350\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2591/3000 [3:34:56<31:22, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:24:38.899\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2592/3000 [3:35:00<29:42, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:24:42.725\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2593/3000 [3:35:03<28:43, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:24:46.642\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2594/3000 [3:35:08<28:19, 4.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:24:50.719\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 86%|████████▋ | 2595/3000 [3:35:12<28:13, 4.18s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:24:54.885\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2596/3000 [3:35:16<27:44, 4.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:24:58.867\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2597/3000 [3:35:22<32:27, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:25:05.363\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2598/3000 [3:35:27<31:36, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:25:09.810\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2599/3000 [3:35:31<31:13, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:25:14.373\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2600/3000 [3:35:36<30:30, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:25:18.732\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2601/3000 [3:35:40<30:16, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:25:23.223\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2602/3000 [3:35:44<29:43, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:25:27.541\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2603/3000 [3:35:49<29:03, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:25:31.720\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2604/3000 [3:35:53<29:20, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:25:36.295\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2605/3000 [3:35:58<30:47, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:25:41.514\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2606/3000 [3:36:03<30:22, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:25:46.015\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2607/3000 [3:36:08<31:01, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:25:51.015\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2608/3000 [3:36:12<30:47, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:25:55.672\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2609/3000 [3:36:16<29:06, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:25:59.562\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2610/3000 [3:36:21<29:20, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:26:04.186\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2611/3000 [3:36:26<29:19, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:26:08.732\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2612/3000 [3:36:30<29:22, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:26:13.323\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2613/3000 [3:36:35<29:23, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:26:17.910\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2614/3000 [3:36:39<28:25, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:26:22.007\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2615/3000 [3:36:43<28:25, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:26:26.466\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2616/3000 [3:36:47<26:47, 4.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:26:30.085\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2617/3000 [3:36:51<26:53, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:26:34.361\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2618/3000 [3:36:56<27:20, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:26:38.844\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2619/3000 [3:37:00<28:05, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:26:43.573\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2620/3000 [3:37:04<26:59, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:26:47.453\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2621/3000 [3:37:09<27:12, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:26:51.869\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2622/3000 [3:37:14<29:00, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:26:57.165\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2623/3000 [3:37:19<28:56, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:27:01.773\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 87%|████████▋ | 2624/3000 [3:37:23<29:20, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:27:06.639\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2625/3000 [3:37:27<27:55, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:27:10.607\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2626/3000 [3:37:32<27:28, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:27:14.869\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2627/3000 [3:37:36<27:17, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:27:19.222\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2628/3000 [3:37:41<28:01, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:27:24.045\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2629/3000 [3:37:46<29:01, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:27:29.146\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2630/3000 [3:37:50<27:47, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:27:33.210\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2631/3000 [3:37:55<27:48, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:27:37.772\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2632/3000 [3:37:59<28:02, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:27:42.463\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2633/3000 [3:38:04<28:01, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:27:47.068\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2634/3000 [3:38:09<28:41, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:27:52.051\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2635/3000 [3:38:14<28:54, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:27:56.915\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2636/3000 [3:38:18<28:22, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:28:01.424\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2637/3000 [3:38:23<28:36, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:28:06.272\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2638/3000 [3:38:28<29:34, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:28:11.573\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2639/3000 [3:38:33<28:22, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:28:15.854\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2640/3000 [3:38:37<27:38, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:28:20.207\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2641/3000 [3:38:41<27:15, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:28:24.643\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2642/3000 [3:38:46<26:48, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:28:28.994\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2643/3000 [3:38:50<25:39, 4.31s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:28:32.884\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2644/3000 [3:38:55<27:03, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:28:38.025\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2645/3000 [3:39:00<27:40, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:28:42.971\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2646/3000 [3:39:04<27:38, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:28:47.673\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2647/3000 [3:39:09<27:44, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:28:52.464\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2648/3000 [3:39:16<31:15, 5.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:28:59.222\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2649/3000 [3:39:21<30:21, 5.19s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:29:04.085\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2650/3000 [3:39:25<28:09, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:29:08.067\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2651/3000 [3:39:30<28:29, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:29:13.132\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2652/3000 [3:39:34<27:20, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:29:17.416\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2653/3000 [3:39:39<27:19, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:29:22.167\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2654/3000 [3:39:43<26:02, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:29:26.199\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 88%|████████▊ | 2655/3000 [3:39:48<27:18, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:29:31.490\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▊ | 2656/3000 [3:39:52<26:16, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:29:35.681\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▊ | 2657/3000 [3:39:57<25:51, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:29:40.064\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▊ | 2658/3000 [3:40:02<26:27, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:29:44.989\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▊ | 2659/3000 [3:40:07<26:29, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:29:49.697\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▊ | 2660/3000 [3:40:11<26:20, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:29:54.315\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▊ | 2661/3000 [3:40:16<26:27, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:29:59.080\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▊ | 2662/3000 [3:40:20<25:58, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:30:03.518\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2663/3000 [3:40:25<26:13, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:30:08.325\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2664/3000 [3:40:30<26:43, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:30:13.341\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2665/3000 [3:40:35<26:50, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:30:18.226\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2666/3000 [3:40:40<26:33, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:30:22.909\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2667/3000 [3:40:45<26:44, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:30:27.838\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2668/3000 [3:40:49<25:19, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:30:31.857\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2669/3000 [3:40:53<24:10, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:30:35.784\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2670/3000 [3:40:57<23:25, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:30:39.750\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2671/3000 [3:41:01<23:02, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:30:43.820\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2672/3000 [3:41:05<22:58, 4.20s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:30:48.030\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2673/3000 [3:41:10<23:49, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:30:52.797\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2674/3000 [3:41:14<23:58, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:30:57.304\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2675/3000 [3:41:19<25:16, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:31:02.563\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2676/3000 [3:41:24<25:21, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:31:07.324\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2677/3000 [3:41:28<24:25, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:31:11.493\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2678/3000 [3:41:33<24:08, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:31:15.900\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2679/3000 [3:41:38<24:53, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:31:20.916\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2680/3000 [3:41:42<24:36, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:31:25.435\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2681/3000 [3:41:47<24:54, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:31:30.283\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2682/3000 [3:41:51<24:20, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:31:34.659\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2683/3000 [3:41:56<24:43, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:31:39.549\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 89%|████████▉ | 2684/3000 [3:42:01<24:10, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:31:43.931\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2685/3000 [3:42:05<23:46, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:31:48.317\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2686/3000 [3:42:10<24:19, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:31:53.238\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2687/3000 [3:42:15<24:00, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:31:57.740\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2688/3000 [3:42:19<23:50, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:32:02.282\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2689/3000 [3:42:23<22:32, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:32:06.077\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2690/3000 [3:42:28<23:37, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:32:11.174\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2691/3000 [3:42:32<22:46, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:32:15.242\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2692/3000 [3:42:38<24:36, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:32:20.902\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2693/3000 [3:42:42<23:17, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:32:24.894\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2694/3000 [3:42:47<24:08, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:32:30.051\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2695/3000 [3:42:51<23:43, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:32:34.568\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2696/3000 [3:43:04<35:16, 6.96s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:32:46.885\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2697/3000 [3:43:08<30:49, 6.10s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:32:50.987\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2698/3000 [3:43:12<28:02, 5.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:32:55.312\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|████████▉ | 2699/3000 [3:43:17<27:14, 5.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:33:00.414\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2700/3000 [3:43:22<26:03, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:33:05.111\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2701/3000 [3:43:26<24:22, 4.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:33:09.259\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2702/3000 [3:43:31<23:43, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:33:13.766\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2703/3000 [3:43:35<23:39, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:33:18.552\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2704/3000 [3:43:39<22:24, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:33:22.543\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2705/3000 [3:43:45<23:20, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:33:27.764\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2706/3000 [3:43:49<22:40, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:33:32.119\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2707/3000 [3:43:54<23:25, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:33:37.309\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2708/3000 [3:43:58<22:41, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:33:41.663\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2709/3000 [3:44:03<22:11, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:33:46.034\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2710/3000 [3:44:07<21:13, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:33:49.998\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2711/3000 [3:44:11<21:17, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:33:54.487\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2712/3000 [3:44:15<20:46, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:33:58.592\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2713/3000 [3:44:19<20:09, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:34:02.548\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2714/3000 [3:44:24<20:25, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:34:06.998\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 90%|█████████ | 2715/3000 [3:44:28<20:18, 4.28s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:34:11.253\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2716/3000 [3:44:33<21:22, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:34:16.332\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2717/3000 [3:44:37<20:41, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:34:20.411\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2718/3000 [3:44:42<20:31, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:34:24.730\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2719/3000 [3:44:46<20:13, 4.32s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:34:28.940\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2720/3000 [3:44:50<20:11, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:34:33.286\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2721/3000 [3:44:56<22:00, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:34:38.962\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2722/3000 [3:45:00<21:43, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:34:43.556\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2723/3000 [3:45:04<20:52, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:34:47.680\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2724/3000 [3:45:09<21:12, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:34:52.500\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2725/3000 [3:45:14<21:14, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:34:57.187\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2726/3000 [3:45:19<21:23, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:35:01.986\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2727/3000 [3:45:23<21:14, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:35:06.617\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2728/3000 [3:45:28<20:42, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:35:10.950\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2729/3000 [3:45:32<20:19, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:35:15.289\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2730/3000 [3:45:36<19:47, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:35:19.449\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2731/3000 [3:45:40<18:57, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:35:23.281\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2732/3000 [3:45:45<19:11, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:35:27.733\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2733/3000 [3:45:49<18:45, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:35:31.765\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2734/3000 [3:45:53<19:02, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:35:36.247\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2735/3000 [3:45:58<20:02, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:35:41.343\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2736/3000 [3:46:02<19:06, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:35:45.233\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████ | 2737/3000 [3:46:07<19:13, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:35:49.720\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 2738/3000 [3:46:11<19:40, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:35:54.512\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 2739/3000 [3:46:16<20:24, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:35:59.628\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 2740/3000 [3:46:21<20:19, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:36:04.324\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 2741/3000 [3:46:26<19:59, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:36:08.819\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 2742/3000 [3:46:30<19:52, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:36:13.421\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 2743/3000 [3:46:35<19:27, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:36:17.775\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 91%|█████████▏| 2744/3000 [3:46:39<19:29, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:36:22.408\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2745/3000 [3:46:44<19:37, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:36:27.141\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2746/3000 [3:46:49<19:30, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:36:31.725\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2747/3000 [3:46:54<19:58, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:36:36.765\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2748/3000 [3:46:58<19:25, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:36:41.132\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2749/3000 [3:47:03<19:31, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:36:45.895\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2750/3000 [3:47:07<19:03, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:36:50.246\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2751/3000 [3:47:12<19:49, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:36:55.501\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2752/3000 [3:47:17<19:50, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:37:00.351\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2753/3000 [3:47:22<19:26, 4.72s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:37:04.893\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2754/3000 [3:47:26<19:26, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:37:09.679\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2755/3000 [3:47:32<20:04, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:37:15.003\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2756/3000 [3:47:37<20:17, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:37:20.163\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2757/3000 [3:47:42<19:42, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:37:24.744\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2758/3000 [3:47:46<18:58, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:37:29.074\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2759/3000 [3:47:51<19:11, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:37:34.023\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2760/3000 [3:47:55<18:14, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:37:38.076\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2761/3000 [3:48:00<18:44, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:37:43.119\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2762/3000 [3:48:04<17:39, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:37:46.985\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2763/3000 [3:48:09<18:18, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:37:52.039\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2764/3000 [3:48:14<19:00, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:37:57.334\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2765/3000 [3:48:19<18:51, 4.81s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:38:02.103\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2766/3000 [3:48:23<18:27, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:38:06.642\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2767/3000 [3:48:30<19:54, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:38:12.690\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2768/3000 [3:48:33<18:24, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:38:16.599\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2769/3000 [3:48:38<18:42, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:38:21.682\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2770/3000 [3:48:43<18:10, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:38:26.158\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2771/3000 [3:48:48<18:07, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:38:30.928\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2772/3000 [3:48:52<18:01, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:38:35.655\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2773/3000 [3:48:58<19:14, 5.09s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:38:41.537\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▏| 2774/3000 [3:49:03<18:39, 4.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:38:46.186\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 92%|█████████▎| 2775/3000 [3:49:09<19:14, 5.13s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:38:51.723\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2776/3000 [3:49:13<18:47, 5.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:38:56.527\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2777/3000 [3:49:18<18:22, 4.94s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:39:01.262\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2778/3000 [3:49:23<18:02, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:39:05.980\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2779/3000 [3:49:27<17:08, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:39:10.124\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2780/3000 [3:49:31<16:57, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:39:14.673\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2781/3000 [3:49:37<17:46, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:39:20.117\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2782/3000 [3:49:41<17:16, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:39:24.604\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2783/3000 [3:49:46<16:40, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:39:28.877\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2784/3000 [3:49:50<16:30, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:39:33.411\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2785/3000 [3:49:54<15:49, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:39:37.430\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2786/3000 [3:49:58<15:12, 4.27s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:39:41.344\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2787/3000 [3:50:03<15:46, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:39:46.210\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2788/3000 [3:50:07<15:41, 4.44s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:39:50.644\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2789/3000 [3:50:12<16:01, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:39:55.461\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2790/3000 [3:50:17<16:00, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:40:00.087\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2791/3000 [3:50:21<15:19, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:40:04.077\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2792/3000 [3:50:26<15:45, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:40:08.960\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2793/3000 [3:50:31<15:58, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:40:13.789\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2794/3000 [3:50:34<15:01, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:40:17.569\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2795/3000 [3:50:39<14:55, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:40:21.917\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2796/3000 [3:50:43<15:11, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:40:26.624\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2797/3000 [3:50:51<18:28, 5.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:40:34.405\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2798/3000 [3:50:56<17:35, 5.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:40:39.083\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2799/3000 [3:51:00<16:52, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:40:43.679\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2800/3000 [3:51:05<16:12, 4.86s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:40:48.131\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2801/3000 [3:51:10<15:50, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:40:52.704\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2802/3000 [3:51:14<15:10, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:40:56.885\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2803/3000 [3:51:18<14:39, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:41:01.048\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 93%|█████████▎| 2804/3000 [3:51:22<14:10, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:41:05.093\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2805/3000 [3:51:28<15:25, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:41:10.788\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2806/3000 [3:51:32<14:44, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:41:14.910\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2807/3000 [3:51:37<15:18, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:41:20.131\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2808/3000 [3:51:41<14:40, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:41:24.313\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2809/3000 [3:51:45<13:52, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:41:28.142\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2810/3000 [3:51:50<14:41, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:41:33.440\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2811/3000 [3:51:54<14:14, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:41:37.677\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▎| 2812/3000 [3:51:59<14:32, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:41:42.609\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2813/3000 [3:52:04<14:16, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:41:47.036\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2814/3000 [3:52:09<14:24, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:41:51.852\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2815/3000 [3:52:13<14:16, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:41:56.427\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2816/3000 [3:52:19<14:46, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:42:01.696\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2817/3000 [3:52:22<13:41, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:42:05.412\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2818/3000 [3:52:27<14:16, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:42:10.619\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2819/3000 [3:52:32<14:22, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:42:15.533\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2820/3000 [3:52:36<13:36, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:42:19.528\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2821/3000 [3:52:41<13:56, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:42:24.521\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2822/3000 [3:52:45<13:21, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:42:28.631\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2823/3000 [3:52:50<13:22, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:42:33.236\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2824/3000 [3:52:55<13:16, 4.52s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:42:37.734\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2825/3000 [3:52:59<13:25, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:42:42.528\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2826/3000 [3:53:04<13:36, 4.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:42:47.434\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2827/3000 [3:53:08<13:07, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:42:51.660\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2828/3000 [3:53:13<12:52, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:42:56.000\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2829/3000 [3:53:17<12:43, 4.46s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:43:00.405\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2830/3000 [3:53:21<12:16, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:43:04.437\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2831/3000 [3:53:27<13:08, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:43:09.876\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2832/3000 [3:53:32<13:24, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:43:14.957\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2833/3000 [3:53:36<13:10, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:43:19.564\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2834/3000 [3:53:41<13:09, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:43:24.361\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 94%|█████████▍| 2835/3000 [3:53:46<13:29, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:43:29.622\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2836/3000 [3:53:50<12:33, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:43:33.488\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2837/3000 [3:53:54<12:02, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:43:37.543\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2838/3000 [3:53:59<11:57, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:43:41.957\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2839/3000 [3:54:03<12:02, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:43:46.586\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2840/3000 [3:54:08<11:51, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:43:50.936\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2841/3000 [3:54:13<12:09, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:43:55.859\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2842/3000 [3:54:17<12:14, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:44:00.643\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2843/3000 [3:54:21<11:35, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:44:04.565\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2844/3000 [3:54:25<10:57, 4.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:44:08.278\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2845/3000 [3:54:30<11:29, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:44:13.278\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2846/3000 [3:54:34<11:16, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:44:17.542\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2847/3000 [3:54:38<10:49, 4.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:44:21.439\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2848/3000 [3:54:43<11:12, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:44:26.291\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▍| 2849/3000 [3:54:47<10:57, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:44:30.470\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2850/3000 [3:54:52<10:53, 4.36s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:44:34.842\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2851/3000 [3:54:56<10:38, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:44:38.967\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2852/3000 [3:55:00<10:36, 4.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:44:43.301\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2853/3000 [3:55:04<10:30, 4.29s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:44:47.551\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2854/3000 [3:55:08<10:16, 4.22s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:44:51.627\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2855/3000 [3:55:13<10:17, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:44:55.974\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2856/3000 [3:55:18<10:49, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:45:01.061\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2857/3000 [3:55:23<10:53, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:45:05.774\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2858/3000 [3:55:28<11:03, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:45:10.698\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2859/3000 [3:55:32<10:53, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:45:15.227\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2860/3000 [3:55:36<10:15, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:45:19.072\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2861/3000 [3:55:41<10:43, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:45:24.253\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2862/3000 [3:55:47<11:40, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:45:30.364\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 95%|█████████▌| 2863/3000 [3:55:52<11:35, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:45:35.450\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647080.288776843)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647090.366834175)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647101.27365911)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647095.790663377)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647074.644035077)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647085.405493538)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647111.311205476)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647121.937867264)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647131.911965405)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647142.900387453)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647127.369277347)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647136.657925622)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647116.690238704)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647153.820443471)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647163.764608392)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647173.790053666)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647183.923390364)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647159.239755093)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647168.807046432)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647179.282704983)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647193.768783888)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647204.243833364)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647213.358147757)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647224.36558074)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647199.598858813)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647208.971547666)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647219.32504449)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647233.927288076)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647244.438102883)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647254.814794931)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647265.46279806)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647238.927319857)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647249.741853463)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647259.959191291)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647270.433316048)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647275.513392573)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647286.594213754)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647296.784570909)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647306.442558684)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647280.554423997)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647291.960323959)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647301.915464452)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647317.430078998)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647327.837384377)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647337.750178344)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647347.062855808)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647323.005912014)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647332.286959652)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647342.278063231)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647356.60991709)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647367.17670381)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647376.784874862)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647386.307032872)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647361.229554177)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647372.040792508)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647381.388638632)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647395.454406962)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647404.954966221)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647414.246599822)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647423.59358169)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647400.296977294)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647409.196764697)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647418.735469149)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647433.406848639)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647444.43853073)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647454.121953261)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647464.88682731)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647438.127719888)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647449.383190936)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647459.896232165)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647475.826808037)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647485.489516184)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647494.409435221)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647502.942573983)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647480.49300888)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647498.714459747)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647490.269506295)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647512.258360599)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647523.304864898)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647533.736146498)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647543.303854066)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647517.667658853)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647528.941296221)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647538.626087301)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647553.479463916)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647564.569176143)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647574.683563878)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647584.229189914)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647557.692903646)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647569.820492831)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647579.845981051)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647594.862783926)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647604.168782618)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647616.590360495)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647627.214619964)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647599.414831648)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647610.328030195)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647621.67986281)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647637.619400165)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647648.46170526)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647659.182953691)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647669.637407904)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647642.754894819)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647654.133771736)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647665.218007402)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647678.919816992)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647689.256079027)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647699.755697602)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647714.503664327)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647684.425940822)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647694.899471939)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647709.015352322)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647725.531340704)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647735.227950256)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647746.243636966)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647756.033279857)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647730.540729231)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647740.535532187)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647750.862108643)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647766.012076689)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647776.251591492)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647785.343142134)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647795.003368245)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647771.307313706)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647780.64596828)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647789.891463814)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647804.866166973)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647815.887509222)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647824.916628938)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647835.552346161)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647809.63977347)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647820.643315602)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647830.337090036)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647845.782200902)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647855.262565309)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647865.171866831)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647875.315681547)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647850.775904606)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647860.333604015)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647869.809058069)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647887.469529245)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647897.074842718)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647908.945017958)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647920.340636798)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647892.31246876)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647903.945859527)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647914.717528511)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647931.397182209)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647941.860182002)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647954.033595621)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647964.155762623)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647936.53927803)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647947.715412675)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647958.499177923)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647974.557974962)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647984.637212031)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647993.97466845)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648004.553897407)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647979.725121577)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647989.158938835)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647999.42952245)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648014.851141415)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648024.079438499)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648034.861571569)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648045.063821489)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648019.438094697)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648039.890575872)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648029.14426642)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648055.488141698)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648065.837583879)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648076.859475112)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648087.174396263)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648060.597654578)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648071.203964433)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648082.584269785)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647106.509875185)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647148.630965075)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647188.919503118)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647228.962111649)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647311.75057821)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647351.458497225)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647390.708268191)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647428.601294928)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647470.446032622)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647507.712612455)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647548.305798766)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647588.969083876)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647632.436792201)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647674.435453523)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647720.279841973)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647761.094719668)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647799.736205469)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647840.746378354)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647880.774568527)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1647925.864076226)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1648009.435851494)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648050.745722884)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648092.359010584)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1647968.863191656)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648099.340639911)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648110.78136525)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648115.759261846)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648120.255414278)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648125.401337065)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648134.278518654)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648129.562332438)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648139.563699625)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648104.154949882)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648144.509968254)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648149.593513898)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648159.303032682)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648163.870861128)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648169.691236139)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648174.815789959)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648154.834037256)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648179.098144504)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648184.929925806)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648190.208212518)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648195.167479108)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648200.686906112)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648213.042725118)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648223.006695742)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648228.255196676)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648207.118134884)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1648217.794640947)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648233.786932485)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648238.580506038)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648247.994120049)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648252.546316266)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648257.819822098)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648262.534325204)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648267.076652637)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648243.621308586)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648271.550205915)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648276.851597765)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648280.716177963)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648285.205857695)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648294.011295636)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648302.937952388)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648289.516546516)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648298.454920326)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648307.482404086)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648312.247902197)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648327.464903996)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648331.55054268)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648335.488382208)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648343.218730557)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648347.206609455)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648318.446277914)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648339.433040642)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648323.199864603)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648355.662343994)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1648360.319425405)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648364.03985307)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648373.026968739)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648368.436111719)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648377.867348782)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648351.400188731)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648383.115153391)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648387.124563543)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648391.614510654)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648395.851604373)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648404.662550723)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648413.149684033)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648417.504412845)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648400.111315097)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648408.894158554)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648422.488237582)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648431.304493133)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648439.837878039)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648448.058827436)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648452.112911399)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648427.305783625)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648434.798976026)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648443.825637749)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648456.754604823)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648465.476795151)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648473.755326189)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648481.783876228)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648486.38852179)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648461.178829745)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648469.326785765)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648477.794741405)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648490.929340323)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648499.191682889)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648507.380093156)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648515.393129528)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648519.84994471)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648494.88283232)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648503.606503764)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648511.225452902)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648523.608406489)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648531.884094225)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648540.495497643)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648548.893351699)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648553.081935218)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648527.974153367)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648536.039626317)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648544.78774522)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648557.55517088)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648566.089635397)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648575.926984824)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648583.808111984)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648588.476841145)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648561.955567845)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648571.647668102)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648579.880779392)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648594.265684165)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1648603.856499976)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648612.395531089)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648621.381324181)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648626.200310723)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648598.521388419)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648608.238671931)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648616.831021983)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648630.431838198)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648638.776470834)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648647.892481288)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648658.736879444)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648663.636220165)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648634.827067704)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648643.269929039)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648652.582894164)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648667.971129066)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648675.745527064)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648685.473764248)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648694.614697067)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648698.852625146)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648672.070963419)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648679.908901329)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648690.264890335)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648702.973735031)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648711.019424608)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648720.314220713)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648731.083380588)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648735.213925469)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1648706.924839959)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648726.950757835)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648715.85947672)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648740.005964871)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648748.443993069)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648758.259995222)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648767.776574875)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648772.353240817)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648744.60642357)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648753.596249928)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648762.887629163)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648776.88357508)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648785.231627565)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648806.824527774)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648815.298739956)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648819.69672513)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648781.330375919)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648790.398420568)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648811.293605676)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648825.201360783)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648835.202311622)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648844.738308965)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648854.873291884)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648859.089965637)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648830.299717839)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648839.956305683)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648850.390041276)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648863.642977221)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648873.022592393)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648882.654652518)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648891.251433687)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648895.933218269)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648868.26993915)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648877.791566775)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648887.243524796)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648899.582081122)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648908.44642111)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648916.523838591)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648925.925899035)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648904.125699918)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648912.368501118)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648921.122419529)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648930.186467143)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648934.711235775)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648943.691867249)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648953.016258436)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648961.719306778)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648965.538178395)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648939.562221892)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648948.234289232)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648957.308084533)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648970.046495491)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648978.436418107)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648987.876289302)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1648998.33629904)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649003.022282981)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1648974.326787278)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648982.931283173)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1648993.756168171)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649007.845579742)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649018.551264231)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649028.220627252)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649037.172624619)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649041.194845832)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649014.382694607)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649023.31572752)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649032.045149411)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649045.438805927)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649054.049763347)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649067.34693572)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649076.725727161)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649081.660844425)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649050.015838261)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649063.322351547)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649071.797703396)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649086.429700537)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649095.362622216)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649104.818732651)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649114.66446665)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649119.001142531)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649090.792428328)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649099.715250665)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649110.008963298)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649123.583055923)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649133.743030679)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649143.696661076)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649152.343358205)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649156.617121814)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649128.87478302)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649137.913047933)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649148.077718795)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649161.220903316)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649169.853973713)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649179.106670085)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649188.746434988)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649193.440496809)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649165.639114314)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649174.82878332)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649184.273609391)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649196.95603389)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649206.344759681)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649214.597611416)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649224.946838336)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649201.669429734)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649210.251952923)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649220.270855738)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649229.279103861)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649233.638780386)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649242.601207865)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649251.381639727)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649260.723274667)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649265.327681107)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649246.414283064)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1649255.841540915)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649238.927776805)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649269.892261202)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649278.947063257)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649287.330018335)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649295.549819914)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649299.628089802)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649283.283997054)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649291.622136577)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649274.280730359)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649303.458964048)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649310.921887122)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649320.945577985)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649329.905143195)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649334.090160403)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649307.157604405)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649316.549624126)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649326.161713072)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649337.982579657)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649346.708205093)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649354.604960587)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649363.77586053)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649368.480406396)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649342.794138202)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649350.64491872)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649359.600056575)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649372.944849738)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649381.887236012)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1649390.432847067)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649399.173261144)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649377.440794788)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649385.693135577)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649403.259131837)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649394.583422002)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649407.056347112)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649416.780263912)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649427.087016211)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649436.901914501)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649440.980648852)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649411.462393671)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649421.861556777)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649432.414736197)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649445.635811961)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649454.179700928)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649462.404922572)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649470.841937903)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649476.294621012)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649450.224888671)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649466.688347724)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649458.339667812)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649480.93760417)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649490.180368161)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649499.975235608)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649508.605974953)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649512.845855055)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649494.948580908)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1649504.353994137)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649485.805570497)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649517.693278591)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649526.653990039)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649535.26697246)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649547.368539512)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649551.885519057)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649521.595126499)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649531.10651232)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649539.921878136)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649556.075188019)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649565.123942175)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649574.251548464)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649582.788112215)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649587.180257885)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649560.643298567)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649569.958801851)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649578.496664512)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649591.426940684)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649599.603462975)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649608.212779643)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649616.72040024)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649622.293190074)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649595.448114593)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649603.771837521)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649612.237628049)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649626.914664062)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649635.166056724)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1649643.530687006)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649651.457991948)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649656.717712067)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649639.164986527)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649647.736470796)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649631.115087517)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649661.78055486)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649669.886285088)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649679.22138548)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649687.345847635)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649691.903301862)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649665.62417927)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649674.302077445)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649683.524986152)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649696.173307665)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649705.755445284)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649714.515239124)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649723.374818127)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649727.443608219)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649701.202166025)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649710.238817806)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649719.067304502)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649732.301671986)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649741.718612157)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649751.330070013)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649760.680479188)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649765.341099416)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649736.699329703)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1649746.303869758)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649755.926558629)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649771.335636835)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649780.513082078)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649789.968565232)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649798.492519904)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649802.985905823)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649776.306951933)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649785.146785867)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649793.803409582)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649807.870499588)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649818.657079767)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649828.415458167)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649837.374209489)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649841.466422306)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649824.144205598)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649832.989223479)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649812.645223608)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649846.079997582)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649856.279785755)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649865.273087275)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649874.407007186)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649878.824070584)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649860.619191774)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649870.381177183)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649852.210870432)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649883.127805853)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649887.300235409)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649893.782948238)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649898.079710781)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649907.192459441)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649917.404144179)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649922.066823661)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649902.7332946)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649911.979983179)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649935.373689839)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649939.318876898)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649947.580958947)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649951.615130997)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649926.265380858)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649931.295876626)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649943.601240888)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649955.78668172)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649959.751907528)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649964.158458749)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649969.94156844)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649978.968740256)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649988.329344265)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649992.652339929)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649974.937523016)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1649983.901134873)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1649997.847244164)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650007.175140682)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650016.756187487)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650042.201075461)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650012.342485722)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650037.345705544)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1650048.381870882)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650002.450679001)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650053.299380938)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650062.245256092)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650071.716730963)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650081.677548099)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650086.362443823)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650057.47059654)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650067.059017598)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650076.760179999)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650091.187432163)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650100.047099876)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650109.5230738)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650119.385596622)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650124.051024222)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650095.579501697)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650104.852435523)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650114.65873677)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650128.89512825)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650137.753441658)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650148.528478519)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650157.368179992)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650153.101600721)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650161.661609158)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650133.315875999)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650143.434235662)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650165.49895594)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650175.902095866)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1650186.807574455)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650195.257754364)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650199.593080598)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650170.096392445)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650181.52679584)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650191.135325158)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650205.766745868)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650216.120089531)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650225.089994476)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650234.327654448)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650238.744483034)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650211.259230598)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650220.434790987)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650229.689344065)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650243.717213489)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650247.768607247)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650251.812388942)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650256.418041845)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650266.298864202)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650275.589305326)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650280.406502094)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650260.584176743)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650271.207675895)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650293.335616506)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650298.298061041)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650306.739213945)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650302.878380739)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650311.573033812)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650285.090291085)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650289.237753948)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650316.282303391)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650320.968320778)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650325.397135199)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650334.017396957)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650342.47572897)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650351.5621463)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650356.259721917)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650347.332170474)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650338.386529038)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650360.840874456)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650371.664105715)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650381.646334885)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650390.15119616)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650395.85023418)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650365.427908935)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650376.615084089)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650385.532704583)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650400.724133862)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650409.645147611)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650417.419867903)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650427.882457833)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650433.527727934)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650405.280597852)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650421.846463078)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650413.211065719)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650438.304723421)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650448.690440361)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650461.354916462)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650469.642450271)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650474.537985795)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650443.944755548)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650455.808400863)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650465.600767401)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650478.570051323)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650488.129031653)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650497.110467194)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650505.267755175)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650509.885842562)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650492.721051674)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650501.497564864)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650482.737021204)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650514.720643069)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650523.094944893)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650532.917251327)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650543.442990873)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650548.226206311)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650519.015279496)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650527.42660818)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650538.185368663)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650552.812412877)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650561.724250303)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650570.559962539)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650579.949993976)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650584.839699026)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650557.007141922)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1650566.069889736)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650574.931857799)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650589.426117817)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650598.260451812)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650607.33681314)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650616.047666425)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650621.252316538)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650594.117615428)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650611.339032667)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650603.266106702)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650625.405073995)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650634.716519923)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650644.934080194)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650655.346214246)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650659.566106734)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650630.122115436)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650640.478590492)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650650.846598983)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650665.005373122)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650674.149372196)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650683.407880989)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650692.579822215)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650696.893261559)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650669.298211388)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650688.129349052)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650678.315046303)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650701.071794564)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650709.563817237)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1650718.336485504)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650726.54357355)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650730.48570091)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650705.202563432)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650713.619810236)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650722.657100128)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650734.551102691)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650742.843416371)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650751.872261697)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650760.319311925)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650764.618620529)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650738.994083789)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650746.969415925)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650756.390298426)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650769.145527243)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650778.181718191)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650786.566237548)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650797.006864201)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650801.327282649)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650774.034214051)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650792.556175913)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650782.817480812)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650805.996695497)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650815.610978332)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650824.733294289)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650833.876407931)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650838.424999509)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650810.935291149)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1650820.195678299)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650829.003298508)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650842.251199335)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650850.245747646)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650858.393772386)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650869.336480803)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650873.899481478)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650846.168655275)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650854.411638152)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650864.88959797)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650878.258064555)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650887.067679857)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650895.821911674)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650905.54120297)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650910.541958997)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650882.749400968)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650891.246918233)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650901.040370917)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650915.19885148)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650923.712073365)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650932.849619494)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650941.533846627)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650945.99264595)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650919.088710011)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650928.258123171)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650937.436423704)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650949.611557397)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650958.370439219)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650966.979533609)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650976.691451044)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650981.299514329)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650971.395748159)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650953.887674435)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650963.098952553)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650986.165250856)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1650994.395452077)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651003.571985416)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651012.736985143)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651017.298839491)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650990.133734458)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1650998.748597817)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651008.67297756)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651021.989195439)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651031.577173106)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651040.950110931)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651051.099315326)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651055.380658822)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651026.594340155)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1651036.441910262)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1651045.79839186)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651059.733977445)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651068.520672446)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651077.55121896)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651087.199985999)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651091.990412879)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651064.169950694)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1651072.410322823)])']\n", "connector: \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unclosed connector\n", "connections: ['deque([(, 1651082.497457991)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651098.748894364)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651107.593637517)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651116.942721718)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651125.725256815)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651103.611121703)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1651112.658624908)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1651121.693981398)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1651131.016591267)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651135.207574431)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651144.515844478)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651153.841420513)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651163.045031835)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651139.590451677)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1651149.223562763)])']\n", "connector: \n", "Unclosed connector\n", "connections: ['deque([(, 1651167.85196466)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651158.606784076)])']\n", "connector: \n", "Unclosed client session\n", "client_session: \n", "Unclosed connector\n", "connections: ['deque([(, 1651172.867640136)])']\n", "connector: \n", "Evaluating workflow: 95%|█████████▌| 2864/3000 [3:55:58<11:48, 5.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:45:40.964\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2865/3000 [3:56:02<11:04, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:45:45.211\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2866/3000 [3:56:07<10:53, 4.87s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:45:49.979\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2867/3000 [3:56:12<11:10, 5.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:45:55.404\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2868/3000 [3:56:17<11:06, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:46:00.466\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2869/3000 [3:56:21<10:21, 4.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:46:04.510\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2870/3000 [3:56:26<09:58, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:46:08.773\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2871/3000 [3:56:30<09:58, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:46:13.502\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2872/3000 [3:56:35<10:06, 4.74s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:46:18.468\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2873/3000 [3:56:40<10:15, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:46:23.563\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2874/3000 [3:56:45<09:56, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:46:28.031\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2875/3000 [3:56:49<09:36, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:46:32.353\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2876/3000 [3:56:54<09:29, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:46:36.910\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2877/3000 [3:56:58<09:12, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:46:41.175\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2878/3000 [3:57:02<08:58, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:46:45.388\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2879/3000 [3:57:08<09:32, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:46:50.860\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2880/3000 [3:57:12<09:04, 4.54s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:46:54.958\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2881/3000 [3:57:17<09:17, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:46:59.993\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2882/3000 [3:57:21<08:57, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:47:04.237\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2883/3000 [3:57:26<08:57, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:47:08.928\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2884/3000 [3:57:31<09:28, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:47:14.534\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2885/3000 [3:57:36<09:08, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:47:19.005\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2886/3000 [3:57:41<09:08, 4.82s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:47:23.926\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▌| 2887/3000 [3:57:45<08:59, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:47:28.601\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2888/3000 [3:57:49<08:23, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:47:32.442\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2889/3000 [3:57:54<08:18, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:47:36.935\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2890/3000 [3:57:58<08:16, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:47:41.500\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2891/3000 [3:58:02<08:00, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:47:45.655\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2892/3000 [3:58:07<08:04, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:47:50.336\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2893/3000 [3:58:12<08:19, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:47:55.424\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2894/3000 [3:58:17<08:10, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:47:59.939\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 96%|█████████▋| 2895/3000 [3:58:21<08:02, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:48:04.479\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2896/3000 [3:58:25<07:44, 4.47s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:48:08.637\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2897/3000 [3:58:30<07:49, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:48:13.396\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2898/3000 [3:58:34<07:34, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:48:17.609\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2899/3000 [3:58:39<07:41, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:48:22.438\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2900/3000 [3:58:44<07:38, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:48:27.083\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2901/3000 [3:58:49<07:38, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:48:31.827\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2902/3000 [3:58:52<07:08, 4.38s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:48:35.595\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2903/3000 [3:58:58<07:34, 4.68s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:48:40.989\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2904/3000 [3:59:02<07:27, 4.66s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:48:45.601\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2905/3000 [3:59:08<08:02, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:48:51.657\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2906/3000 [3:59:12<07:27, 4.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:48:55.658\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2907/3000 [3:59:16<06:56, 4.48s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:48:59.488\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2908/3000 [3:59:21<06:53, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:49:04.011\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2909/3000 [3:59:25<06:39, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:49:08.148\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2910/3000 [3:59:29<06:31, 4.35s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:49:12.422\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2911/3000 [3:59:34<06:32, 4.41s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:49:16.968\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2912/3000 [3:59:38<06:20, 4.33s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:49:21.100\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2913/3000 [3:59:42<06:07, 4.23s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:49:25.103\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2914/3000 [3:59:47<06:34, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:49:30.523\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2915/3000 [3:59:52<06:21, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:49:34.795\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2916/3000 [3:59:56<06:04, 4.34s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:49:38.796\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2917/3000 [4:00:01<06:23, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:49:44.067\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2918/3000 [4:00:05<06:05, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:49:48.128\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2919/3000 [4:00:09<06:00, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:49:52.559\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2920/3000 [4:00:13<05:41, 4.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:49:56.391\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2921/3000 [4:00:19<06:03, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:50:01.787\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2922/3000 [4:00:23<05:50, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:50:06.032\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2923/3000 [4:00:27<05:36, 4.37s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:50:10.096\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 97%|█████████▋| 2924/3000 [4:00:32<05:47, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:50:15.162\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2925/3000 [4:00:37<05:44, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:50:19.796\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2926/3000 [4:00:41<05:43, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:50:24.553\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2927/3000 [4:00:46<05:38, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:50:29.186\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2928/3000 [4:00:50<05:28, 4.57s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:50:33.578\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2929/3000 [4:00:55<05:16, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:50:37.773\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2930/3000 [4:00:59<05:21, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:50:42.680\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2931/3000 [4:01:05<05:29, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:50:47.864\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2932/3000 [4:01:09<05:17, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:50:52.291\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2933/3000 [4:01:14<05:20, 4.79s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:50:57.361\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2934/3000 [4:01:19<05:09, 4.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:51:01.827\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2935/3000 [4:01:23<04:59, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:51:06.243\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2936/3000 [4:01:28<04:54, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:51:10.824\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2937/3000 [4:01:32<04:44, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:51:15.130\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2938/3000 [4:01:36<04:32, 4.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:51:19.229\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2939/3000 [4:01:41<04:30, 4.43s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:51:23.760\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2940/3000 [4:01:46<04:40, 4.67s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:51:28.992\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2941/3000 [4:01:53<05:12, 5.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:51:35.750\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2942/3000 [4:01:57<04:45, 4.92s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:51:39.781\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2943/3000 [4:02:00<04:21, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:51:43.580\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2944/3000 [4:02:05<04:15, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:51:48.096\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2945/3000 [4:02:10<04:13, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:51:52.799\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2946/3000 [4:02:14<04:05, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:51:57.213\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2947/3000 [4:02:19<04:00, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:52:01.709\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2948/3000 [4:02:23<03:54, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:52:06.182\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2949/3000 [4:02:29<04:06, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:52:11.742\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2950/3000 [4:02:33<04:01, 4.83s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:52:16.561\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2951/3000 [4:02:38<03:46, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:52:20.733\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2952/3000 [4:02:42<03:39, 4.58s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:52:25.184\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2953/3000 [4:02:47<03:34, 4.56s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:52:29.698\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2954/3000 [4:02:53<03:49, 4.99s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:52:35.712\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 98%|█████████▊| 2955/3000 [4:02:57<03:41, 4.91s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:52:40.432\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▊| 2956/3000 [4:03:02<03:30, 4.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:52:44.887\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▊| 2957/3000 [4:03:06<03:18, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:52:49.137\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▊| 2958/3000 [4:03:11<03:14, 4.64s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:52:53.838\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▊| 2959/3000 [4:03:24<04:59, 7.30s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:53:07.335\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▊| 2960/3000 [4:03:29<04:23, 6.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:53:12.258\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▊| 2961/3000 [4:03:34<03:51, 5.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:53:16.714\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▊| 2962/3000 [4:03:39<03:36, 5.69s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:53:21.820\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2963/3000 [4:03:43<03:14, 5.24s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:53:26.014\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2964/3000 [4:03:48<03:09, 5.26s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:53:31.307\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2965/3000 [4:03:53<02:57, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:53:35.966\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2966/3000 [4:03:58<02:52, 5.08s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:53:41.039\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2967/3000 [4:04:09<03:50, 6.98s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:53:52.469\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2968/3000 [4:04:14<03:24, 6.39s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:53:57.471\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2969/3000 [4:04:19<02:58, 5.75s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:54:01.715\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2970/3000 [4:04:22<02:34, 5.16s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:54:05.493\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2971/3000 [4:04:27<02:22, 4.90s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:54:09.812\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2972/3000 [4:04:31<02:12, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:54:14.126\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2973/3000 [4:04:36<02:07, 4.71s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:54:18.782\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2974/3000 [4:04:40<02:00, 4.62s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:54:23.195\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2975/3000 [4:04:45<01:56, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:54:27.917\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2976/3000 [4:04:49<01:50, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:54:32.364\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2977/3000 [4:04:53<01:42, 4.45s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:54:36.476\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2978/3000 [4:04:58<01:41, 4.61s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:54:41.465\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2979/3000 [4:05:03<01:36, 4.59s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:54:46.020\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2980/3000 [4:05:07<01:30, 4.53s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:54:50.410\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2981/3000 [4:05:11<01:23, 4.42s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:54:54.557\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2982/3000 [4:05:16<01:22, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:54:59.573\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2983/3000 [4:05:21<01:16, 4.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:55:03.863\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 99%|█████████▉| 2984/3000 [4:05:25<01:12, 4.55s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:55:08.531\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2985/3000 [4:05:30<01:07, 4.49s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:55:12.875\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2986/3000 [4:05:35<01:04, 4.60s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:55:17.728\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2987/3000 [4:05:39<00:58, 4.51s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:55:22.027\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2988/3000 [4:05:44<00:55, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:55:26.938\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2989/3000 [4:05:49<00:52, 4.80s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:55:32.147\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2990/3000 [4:05:54<00:48, 4.84s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:55:37.069\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2991/3000 [4:05:59<00:42, 4.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n", "\u001b[32m2026-01-05 13:55:41.699\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2992/3000 [4:06:04<00:39, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:55:47.136\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2993/3000 [4:06:09<00:34, 4.97s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:55:52.082\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2994/3000 [4:06:14<00:30, 5.05s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:55:57.331\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2995/3000 [4:06:19<00:24, 4.88s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:56:01.801\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2996/3000 [4:06:23<00:18, 4.65s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:56:05.930\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2997/3000 [4:06:28<00:14, 4.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:56:10.851\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2998/3000 [4:06:32<00:09, 4.63s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:56:15.256\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "Evaluating workflow: 100%|█████████▉| 2999/3000 [4:06:36<00:04, 4.40s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "\u001b[32m2026-01-05 13:56:19.114\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36m_validate_workflow_structure\u001b[0m:\u001b[36m363\u001b[0m - \u001b[33m\u001b[1mThe workflow contains isolated nodes: ['generate_answer', 'validate_predictions7073']\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Evaluating workflow: 100%|██████████| 3000/3000 [4:06:40<00:00, 4.93s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n", "Evaluation metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.7756666666666666}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "\n", "optimizer.evaluator.dataname = 'hotpotqa'\n", "optimizer.optimize(dataset=benchmark,provided_scorer=True)\n", "optimizer.restore_best_graph()\n", "optimizer.save(\"./debug/save_10_noreason_reploge.json\")\n", "\n", "# evaluate the optimized SEW workflow\n", "\n", "optimizer.evaluator.dataname = 'hotpotqa'\n", "with suppress_logger_info():\n", " metrics = optimizer.evaluate(dataset=benchmark, eval_mode=\"test\")\n", "print(\"Evaluation metrics: \", metrics)" ] }, { "cell_type": "code", "execution_count": null, "id": "5be75be2", "metadata": {}, "outputs": [], "source": [ "optimizer.restore_best_graph()" ] }, { "cell_type": "code", "execution_count": 11, "id": "7fb24789", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'f1': 0.0, 'em': 0.0, 'acc': 0.7756666666666666}" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metrics" ] }, { "cell_type": "code", "execution_count": 30, "id": "60a3d289", "metadata": {}, "outputs": [], "source": [ "outdict = []\n", "for key in optimizer.evaluator._evaluation_records.keys():\n", " dict_out = optimizer.evaluator._evaluation_records[key]\n", " outdict.append({'prediction':dict_out['prediction'], 'label':dict_out['label'], 'metrics':dict_out['metrics']})" ] }, { "cell_type": "code", "execution_count": 35, "id": "001fb58c", "metadata": {}, "outputs": [], "source": [ "import pickle\n", "file_path = 'reploge_out_stat.pkl' # Files commonly use .pkl or .pickle extension\n", "\n", "with open(file_path, 'wb') as file:\n", " pickle.dump(outdict, file)" ] }, { "cell_type": "code", "execution_count": 32, "id": "fb9cd5bd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Yes No\n", "**Final Output: Yes** No\n", "**Final Output:**\n", "Yes No\n", "No Yes\n", "Yes No\n", "**Final Output: Yes** No\n", "The answer is not validated. Yes\n", "Yes No\n", "No Yes\n", "**Final Output: Yes** No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**Answer:** No Yes\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "```xml\n", "Yes\n", "``` No\n", "Yes No\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**No** Yes\n", "**Final Output: Yes** No\n", "Yes No\n", "The final answer is: \"The answer provided has not been validated successfully, indicating it may be incorrect.\" Yes\n", "Yes No\n", "```xml\n", "Yes\n", "``` No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "\"Further clarification or information is needed to address the issue with the validated answer.\" Yes\n", "The validated answer is \"No.\" Yes\n", "```xml\n", "Yes\n", "``` No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "The validated answer is \"No.\" Yes\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "**Final Output: Yes** No\n", "**Final Answer:** Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**No** Yes\n", "**Final Output: Yes** No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "**Final Output: Yes** No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**Answer:** No Yes\n", "No Yes\n", "**Final Output:**\n", "Yes No\n", "The final answer is: \"No\" Yes\n", "Yes No\n", "Yes No\n", "```xml\n", "Yes\n", "``` No\n", "No validation of the answer has been confirmed. Yes\n", "Yes No\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "```xml\n", "Yes\n", "``` No\n", "**Final Output:**\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "The final answer is: \"No\" Yes\n", "No Yes\n", "Yes No\n", "Yes No\n", "Yes No\n", "**Final Output: Yes** No\n", "**Final Output:**\n", "Yes No\n", "**Final Output: Yes** No\n", "**Final Output:**\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "**Final Output: Yes** No\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "Yes No\n", "```xml\n", "Yes\n", "``` No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "```xml\n", "Yes\n", "``` No\n", "Yes No\n", "Yes No\n", "The final answer is: \"No\" Yes\n", "Yes No\n", "The final answer is: \"No\" Yes\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "The final answer is: \"No\" Yes\n", "**Final Output: Yes** No\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "No Yes\n", "**Final Output:**\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**No** Yes\n", "Yes No\n", "The final answer is: \"No\" Yes\n", "No Yes\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "No Yes\n", "**Final Output: Yes** No\n", "Yes No\n", "Yes No\n", "Yes No\n", "No Yes\n", "The final answer is: \"No\" Yes\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "Yes No\n", "```xml\n", "Yes\n", "``` No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**Answer:** No Yes\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "No Yes\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**Answer:** No Yes\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**The validated answer is \"No.\"** Yes\n", "**Final Output:**\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**Answer:** No Yes\n", "Yes No\n", "The final answer is: \"No\" Yes\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "Yes No\n", "The validated answer is \"No.\" Yes\n", "Yes No\n", "**Final Answer:** Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Output: Yes** No\n", "**Final Output: Yes** No\n", "**Final Answer:** Yes No\n", "Yes No\n", "The final answer is: \"No.\" Yes\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "The final answer is: \"No\" Yes\n", "Yes No\n", "The answer is not validated. Yes\n", "**Final Output: Yes** No\n", "**Final Output: Yes** No\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "The final output based on the workflow goal is:\n", "\n", "**Final Answer:** No validation was achieved. Yes\n", "**Final Output:**\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**The validated answer is \"No.\"** Yes\n", "Yes No\n", "The final answer is: \"No\" Yes\n", "The validated answer is \"No.\" Yes\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "The final answer is: \"No\" Yes\n", "Yes No\n", "Yes No\n", "**Final Answer:** Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "```xml\n", "Since the validated answer is \"Yes\", it indicates that whatever question or context was provided has been confirmed as true or correct. I will output this confirmation in the required XML format.\n", "Yes\n", "``` No\n", "No Yes\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "No Yes\n", "Yes No\n", "Yes No\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "Yes No\n", "```xml\n", "Yes\n", "``` No\n", "```xml\n", "Yes\n", "``` No\n", "No Yes\n", "The answer is not validated. No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**No** Yes\n", "The final output is: \n", "\n", "**No** Yes\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "The final answer is: \"No\" Yes\n", "Yes No\n", "The final output based on the workflow goal is:\n", "\n", "**Final Answer:** No Yes\n", "```xml\n", "Yes\n", "``` No\n", "The final answer is: \"No\" Yes\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "**Final Answer:** Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "Yes No\n", "The validated answer is \"No.\" Yes\n", "The final output that meets the Workflow Goal is:\n", "\n", "**Answer:** No Yes\n", "Yes No\n", "The validated answer is \"No.\" Yes\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "**Final Answer:** Yes No\n", "No Yes\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "No Yes\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "The final output based on the workflow goal is:\n", "\n", "**No** Yes\n", "**Final Output:**\n", "Yes No\n", "```xml\n", "Yes\n", "``` No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "**Final Answer:** Yes No\n", "The final output based on the workflow goal is:\n", "\n", "**Final Answer:** No Yes\n", "Yes No\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**No** Yes\n", "Yes No\n", "The validated answer is \"No.\" Yes\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**No** Yes\n", "Yes No\n", "The validated answer is \"No.\" Yes\n", "Yes No\n", "Yes No\n", "**Final Output: Yes** No\n", "**Final Answer:** Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**Answer:** No Yes\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "The validated answer is \"No.\" Yes\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**Answer:** No Yes\n", "**Final Output:**\n", "Yes No\n", "No Yes\n", "The validated answer is \"No.\" Yes\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "Yes No\n", "The final output based on the workflow goal is:\n", "\n", "**Answer:** No Yes\n", "No Yes\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**Answer:** No Yes\n", "Yes No\n", "No Yes\n", "```xml\n", "Yes\n", "``` No\n", "The validated answer is \"No.\" Yes\n", "Yes No\n", "Yes No\n", "**Final Answer:** Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "The validated answer is \"No.\" Yes\n", "Yes No\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "The validated answer is \"No.\" Yes\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "No Yes\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "Yes No\n", "**Final Output: Yes** No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**The answer is not validated.** Yes\n", "The final output that meets the Workflow Goal is:\n", "\n", "**Answer:** No Yes\n", "No Yes\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "The final output is: \n", "\n", "**No** Yes\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "The answer is not validated. Yes\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "No Yes\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "**Final Output: Yes** No\n", "```xml\n", "Yes\n", "``` No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "No Yes\n", "No Yes\n", "The final output based on the workflow goal is:\n", "\n", "**Answer:** No Yes\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Answer:** Yes No\n", "The final answer is: \"No\" Yes\n", "The final output that meets the Workflow Goal is:\n", "\n", "**No** Yes\n", "**Final Output: Yes** No\n", "**Final Output:**\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**Final Answer:** No Yes\n", "Yes No\n", "```xml\n", "Yes\n", "``` No\n", "```xml\n", "Since the validated answer provided is \"Yes\", it indicates a positive affirmation or agreement with a question or statement. I will encapsulate this response in the required XML format.\n", "Yes\n", "``` No\n", "The final output based on the workflow goal is:\n", "\n", "\"The provided answer has not been validated and may be incorrect.\" Yes\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "**Final Answer:** Yes No\n", "The answer is not validated. No\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "Yes No\n", "The validated answer is \"No.\" Yes\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "**Final Answer:** Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "```xml\n", "Yes\n", "``` No\n", "Yes No\n", "The validated answer is \"No.\" Yes\n", "**Final Output:**\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**Answer:** No Yes\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "The final answer is: \"No\" Yes\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "```xml\n", "Yes\n", "``` No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**No** Yes\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "Yes No\n", "```xml\n", "Yes\n", "``` No\n", "The final answer is: \"The previous answer was deemed incorrect. Please provide additional details or clarify your question for better assistance.\" Yes\n", "Yes No\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**Answer:** No Yes\n", "The final answer is: \"No.\" Yes\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "The validated answer is \"No.\" Yes\n", "The final answer is: \"No.\" Yes\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "```xml\n", "Yes\n", "``` No\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**No** Yes\n", "Yes No\n", "Yes No\n", "The validated answer is \"No.\" Yes\n", "The final output that meets the Workflow Goal is:\n", "\n", "**No** Yes\n", "The final output that meets the Workflow Goal is:\n", "\n", "**Answer:** No Yes\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "The final answer is: \"No\" Yes\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**No** Yes\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "The final answer is: \"No\" Yes\n", "The validated answer is \"No.\" Yes\n", "The validated answer is \"No.\" Yes\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "Yes No\n", "No Yes\n", "**Final Output:**\n", "Yes No\n", "**Final Output: Yes** No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "The final answer is: \"The answer provided is not validated.\" No\n", "The final answer is: \"No\" Yes\n", "Yes No\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**No** Yes\n", "**Final Output:**\n", "Yes No\n", "The validated answer is \"No.\" Yes\n", "Yes No\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "No Yes\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "The final answer is: \"No\" Yes\n", "Yes No\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**No** Yes\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "```xml\n", "Yes\n", "``` No\n", "Yes No\n", "Yes No\n", "**Final Output:**\n", "Yes No\n", "Yes No\n", "Yes No\n", "The final output that meets the Workflow Goal is:\n", "\n", "**No** Yes\n", "**Final Output:**\n", "Yes No\n", "**Final Answer:** Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "The answer is not validated. Yes\n", "Yes No\n", "**Final Output: Yes** No\n", "**Final Output:** Yes No\n", "Yes No\n", "Yes No\n", "**Final Answer:** Yes No\n", "Yes No\n", "```xml\n", "Yes\n", "``` No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "Yes No\n", "The validated answer is \"No.\" Yes\n", "Yes No\n", "The answer provided has not been validated. Yes\n" ] } ], "source": [ "count_list = []\n", "for i in outdict :\n", " if i['metrics']['acc'] ==0:\n", " print(i['prediction'], i['label'])\n", " count_list.append(i['label'])" ] }, { "cell_type": "code", "execution_count": 33, "id": "3e7557b8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 \n", "No 492\n", "Yes 116\n", "Name: count, dtype: int64" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "pd.DataFrame(count_list).value_counts()" ] }, { "cell_type": "code", "execution_count": 34, "id": "8671e0c9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3000" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(benchmark._test_data)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.13" } }, "nbformat": 4, "nbformat_minor": 5 }